2009-02-17 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / profiler / mono-profiler-logging.c
index c04e9cad7eb454822d953c3c15e747889247b78a..7aa8c5f6b2b31df1e5b300a3ef6aaa48b4ba3d94 100644 (file)
@@ -1,6 +1,7 @@
 #include <config.h>
 #include <mono/metadata/profiler.h>
 #include <mono/metadata/class.h>
+#include <mono/metadata/class-internals.h>
 #include <mono/metadata/assembly.h>
 #include <mono/metadata/loader.h>
 #include <mono/metadata/threads.h>
@@ -32,9 +33,20 @@ typedef enum {
        MONO_PROFILER_FILE_BLOCK_KIND_UNLOADED = 5,
        MONO_PROFILER_FILE_BLOCK_KIND_EVENTS = 6,
        MONO_PROFILER_FILE_BLOCK_KIND_STATISTICAL = 7,
-       MONO_PROFILER_FILE_BLOCK_KIND_HEAP = 8
+       MONO_PROFILER_FILE_BLOCK_KIND_HEAP_DATA = 8,
+       MONO_PROFILER_FILE_BLOCK_KIND_HEAP_SUMMARY = 9,
+       MONO_PROFILER_FILE_BLOCK_KIND_DIRECTIVES = 10
 } MonoProfilerFileBlockKind;
 
+typedef enum {
+       MONO_PROFILER_DIRECTIVE_END = 0,
+       MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_CALLER = 1,
+       MONO_PROFILER_DIRECTIVE_ALLOCATIONS_HAVE_STACK = 2,
+       MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_ID = 3,
+       MONO_PROFILER_DIRECTIVE_LAST
+} MonoProfilerDirectives;
+
+
 #define MONO_PROFILER_LOADED_EVENT_MODULE     1
 #define MONO_PROFILER_LOADED_EVENT_ASSEMBLY   2
 #define MONO_PROFILER_LOADED_EVENT_APPDOMAIN  4
@@ -53,19 +65,20 @@ typedef struct _ProfilerEventData {
                gsize number;
        } data;
        unsigned int data_type:2;
-       unsigned int code:3;
+       unsigned int code:4;
        unsigned int kind:1;
-       unsigned int value:26;
+       unsigned int value:25;
 } ProfilerEventData;
 
-#define EXTENDED_EVENT_VALUE_SHIFT (26)
-#define MAX_EVENT_VALUE ((1<<EXTENDED_EVENT_VALUE_SHIFT)-1)
-#define MAX_EXTENDED_EVENT_VALUE ((((guint64))MAX_EVENT_VALUE<<32)|((guint64)0xffffffff))
+#define EVENT_VALUE_BITS (25)
+#define MAX_EVENT_VALUE ((1<<EVENT_VALUE_BITS)-1)
 
 typedef enum {
        MONO_PROFILER_EVENT_METHOD_JIT = 0,
        MONO_PROFILER_EVENT_METHOD_FREED = 1,
-       MONO_PROFILER_EVENT_METHOD_CALL = 2
+       MONO_PROFILER_EVENT_METHOD_CALL = 2,
+       MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER = 3,
+       MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER = 4
 } MonoProfilerMethodEvents;
 typedef enum {
        MONO_PROFILER_EVENT_CLASS_LOAD = 0,
@@ -85,7 +98,10 @@ typedef enum {
        MONO_PROFILER_EVENT_GC_SWEEP = 4,
        MONO_PROFILER_EVENT_GC_RESIZE = 5,
        MONO_PROFILER_EVENT_GC_STOP_WORLD = 6,
-       MONO_PROFILER_EVENT_GC_START_WORLD = 7
+       MONO_PROFILER_EVENT_GC_START_WORLD = 7,
+       MONO_PROFILER_EVENT_JIT_TIME_ALLOCATION = 8,
+       MONO_PROFILER_EVENT_STACK_SECTION = 9,
+       MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID = 10
 } MonoProfilerEvents;
 typedef enum {
        MONO_PROFILER_EVENT_KIND_START = 0,
@@ -97,17 +113,130 @@ typedef enum {
        gettimeofday (&current_time, NULL);\
        (t) = (((guint64)current_time.tv_sec) * 1000000) + current_time.tv_usec;\
 } while (0)
-#if 1
-#define MONO_PROFILER_GET_CURRENT_COUNTER(c) MONO_PROFILER_GET_CURRENT_TIME ((c));
+
+static gboolean use_fast_timer = FALSE;
+
+#if (defined(__i386__) || defined(__x86_64__)) && ! defined(PLATFORM_WIN32)
+
+#if defined(__i386__)
+static const guchar cpuid_impl [] = {
+       0x55,                           /* push   %ebp */
+       0x89, 0xe5,                     /* mov    %esp,%ebp */
+       0x53,                           /* push   %ebx */
+       0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
+       0x0f, 0xa2,                     /* cpuid   */
+       0x50,                           /* push   %eax */
+       0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
+       0x89, 0x18,                     /* mov    %ebx,(%eax) */
+       0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
+       0x89, 0x08,                     /* mov    %ecx,(%eax) */
+       0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
+       0x89, 0x10,                     /* mov    %edx,(%eax) */
+       0x58,                           /* pop    %eax */
+       0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
+       0x89, 0x02,                     /* mov    %eax,(%edx) */
+       0x5b,                           /* pop    %ebx */
+       0xc9,                           /* leave   */
+       0xc3,                           /* ret     */
+};
+
+typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
+
+static int 
+cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx) {
+       int have_cpuid = 0;
+#ifndef _MSC_VER
+       __asm__  __volatile__ (
+               "pushfl\n"
+               "popl %%eax\n"
+               "movl %%eax, %%edx\n"
+               "xorl $0x200000, %%eax\n"
+               "pushl %%eax\n"
+               "popfl\n"
+               "pushfl\n"
+               "popl %%eax\n"
+               "xorl %%edx, %%eax\n"
+               "andl $0x200000, %%eax\n"
+               "movl %%eax, %0"
+               : "=r" (have_cpuid)
+               :
+               : "%eax", "%edx"
+       );
 #else
+       __asm {
+               pushfd
+               pop eax
+               mov edx, eax
+               xor eax, 0x200000
+               push eax
+               popfd
+               pushfd
+               pop eax
+               xor eax, edx
+               and eax, 0x200000
+               mov have_cpuid, eax
+       }
+#endif
+       if (have_cpuid) {
+               CpuidFunc func = (CpuidFunc) cpuid_impl;
+               func (id, p_eax, p_ebx, p_ecx, p_edx);
+               /*
+                * We use this approach because of issues with gcc and pic code, see:
+                * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
+               __asm__ __volatile__ ("cpuid"
+                       : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
+                       : "a" (id));
+               */
+               return 1;
+       }
+       return 0;
+}
+
+static void detect_fast_timer (void) {
+       int p_eax, p_ebx, p_ecx, p_edx;
+       
+       if (cpuid (0x1, &p_eax, &p_ebx, &p_ecx, &p_edx)) {
+               if (p_edx & 0x10) {
+                       use_fast_timer = TRUE;
+               } else {
+                       use_fast_timer = FALSE;
+               }
+       } else {
+               use_fast_timer = FALSE;
+       }
+}
+#endif
+
+#if defined(__x86_64__)
+static void detect_fast_timer (void) {
+       guint32 op = 0x1;
+       guint32 eax,ebx,ecx,edx;
+       __asm__ __volatile__ ("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(op));
+       if (edx & 0x10) {
+               use_fast_timer = TRUE;
+       } else {
+               use_fast_timer = FALSE;
+       }
+}
+#endif
+
 static __inline__ guint64 rdtsc(void) {
        guint32 hi, lo;
        __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
        return ((guint64) lo) | (((guint64) hi) << 32);
 }
 #define MONO_PROFILER_GET_CURRENT_COUNTER(c) {\
-       (c) = rdtsc ();\
+       if (use_fast_timer) {\
+               (c) = rdtsc ();\
+       } else {\
+               MONO_PROFILER_GET_CURRENT_TIME ((c));\
+       }\
 } while (0)
+#else
+static void detect_fast_timer (void) {
+       use_fast_timer = FALSE;
+}
+#define MONO_PROFILER_GET_CURRENT_COUNTER(c) MONO_PROFILER_GET_CURRENT_TIME ((c))
 #endif
 
 
@@ -208,6 +337,22 @@ typedef struct _ProfilerHeapShotWriteBuffer {
        gpointer buffer [PROFILER_HEAP_SHOT_WRITE_BUFFER_SIZE];
 } ProfilerHeapShotWriteBuffer;
 
+typedef struct _ProfilerHeapShotClassSummary {
+       struct {
+               guint32 instances;
+               guint32 bytes;
+       } reachable;
+       struct {
+               guint32 instances;
+               guint32 bytes;
+       } unreachable;
+} ProfilerHeapShotClassSummary;
+
+typedef struct _ProfilerHeapShotCollectionSummary {
+       ProfilerHeapShotClassSummary *per_class_data;
+       guint32 capacity;
+} ProfilerHeapShotCollectionSummary;
+
 typedef struct _ProfilerHeapShotWriteJob {
        struct _ProfilerHeapShotWriteJob *next;
        struct _ProfilerHeapShotWriteJob *next_unwritten;
@@ -222,8 +367,21 @@ typedef struct _ProfilerHeapShotWriteJob {
        guint64 start_time;
        guint64 end_counter;
        guint64 end_time;
+       guint32 collection;
+       ProfilerHeapShotCollectionSummary summary;
+       gboolean dump_heap_data;
 } ProfilerHeapShotWriteJob;
 
+typedef struct _ProfilerThreadStack {
+       guint32 capacity;
+       guint32 top;
+       guint32 last_saved_top;
+       guint32 last_written_frame;
+       MonoMethod **stack;
+       guint8 *method_is_jitted;
+       guint32 *written_frames;
+} ProfilerThreadStack;
+
 typedef struct _ProfilerPerThreadData {
        ProfilerEventData *events;
        ProfilerEventData *next_free_event;
@@ -234,16 +392,32 @@ typedef struct _ProfilerPerThreadData {
        guint64 last_event_counter;
        gsize thread_id;
        ProfilerHeapShotObjectBuffer *heap_shot_object_buffers;
+       ProfilerThreadStack stack;
        struct _ProfilerPerThreadData* next;
 } ProfilerPerThreadData;
 
+typedef struct _ProfilerStatisticalHit {
+       gpointer *address;
+       MonoDomain *domain;
+} ProfilerStatisticalHit;
+
 typedef struct _ProfilerStatisticalData {
-       gpointer *addresses;
-       int next_free_index;
-       int end_index;
-       int first_unwritten_index;
+       ProfilerStatisticalHit *hits;
+       unsigned int next_free_index;
+       unsigned int end_index;
+       unsigned int first_unwritten_index;
 } ProfilerStatisticalData;
 
+typedef struct _ProfilerUnmanagedSymbol {
+       guint32 offset;
+       guint32 size;
+       guint32 id;
+       guint32 index;
+} ProfilerUnmanagedSymbol;
+
+struct _ProfilerExecutableFile;
+struct _ProfilerExecutableFileSectionRegion;
+
 typedef struct _ProfilerExecutableMemoryRegionData {
        gpointer start;
        gpointer end;
@@ -251,6 +425,12 @@ typedef struct _ProfilerExecutableMemoryRegionData {
        char *file_name;
        guint32 id;
        gboolean is_new;
+       
+       struct _ProfilerExecutableFile *file;
+       struct _ProfilerExecutableFileSectionRegion *file_region_reference;
+       guint32 symbols_count;
+       guint32 symbols_capacity;
+       ProfilerUnmanagedSymbol *symbols;
 } ProfilerExecutableMemoryRegionData;
 
 typedef struct _ProfilerExecutableMemoryRegions {
@@ -258,22 +438,186 @@ typedef struct _ProfilerExecutableMemoryRegions {
        guint32 regions_capacity;
        guint32 regions_count;
        guint32 next_id;
+       guint32 next_unmanaged_function_id;
 } ProfilerExecutableMemoryRegions;
 
-typedef struct _ProfilerUnmanagedFunction {
-       guint32 id;
-       guint32 hits;
-       char *name;
-       struct _ProfilerUnmanagedFunction *next_unwritten;
-} ProfilerUnmanagedFunction;
+/* Start of ELF definitions */
+#define EI_NIDENT 16
+typedef guint16 ElfHalf;
+typedef guint32 ElfWord;
+typedef gsize ElfAddr;
+typedef gsize ElfOff;
+
+typedef struct {
+       unsigned char e_ident[EI_NIDENT];
+       ElfHalf e_type;
+       ElfHalf e_machine;
+       ElfWord e_version;
+       ElfAddr e_entry;
+       ElfOff  e_phoff;
+       ElfOff  e_shoff; // Section header table
+       ElfWord e_flags;
+       ElfHalf e_ehsize; // Header size
+       ElfHalf e_phentsize;
+       ElfHalf e_phnum;
+       ElfHalf e_shentsize; // Section header entry size
+       ElfHalf e_shnum; // Section header entries number
+       ElfHalf e_shstrndx; // String table index
+} ElfHeader;
+
+#if (SIZEOF_VOID_P == 4)
+typedef struct {
+       ElfWord sh_name;
+       ElfWord sh_type;
+       ElfWord sh_flags;
+       ElfAddr sh_addr; // Address in memory
+       ElfOff  sh_offset; // Offset in file
+       ElfWord sh_size;
+       ElfWord sh_link;
+       ElfWord sh_info;
+       ElfWord sh_addralign;
+       ElfWord sh_entsize;
+} ElfSection;
+typedef struct {
+       ElfWord       st_name;
+       ElfAddr       st_value;
+       ElfWord       st_size;
+       unsigned char st_info; // Use ELF32_ST_TYPE to get symbol type
+       unsigned char st_other;
+       ElfHalf       st_shndx; // Or one of SHN_ABS, SHN_COMMON or SHN_UNDEF.
+} ElfSymbol;
+#elif (SIZEOF_VOID_P == 8)
+typedef struct {
+       ElfWord sh_name;
+       ElfWord sh_type;
+       ElfOff sh_flags;
+       ElfAddr sh_addr; // Address in memory
+       ElfOff  sh_offset; // Offset in file
+       ElfOff sh_size;
+       ElfWord sh_link;
+       ElfWord sh_info;
+       ElfOff sh_addralign;
+       ElfOff sh_entsize;
+} ElfSection;
+typedef struct {
+       ElfWord       st_name;
+       unsigned char st_info; // Use ELF_ST_TYPE to get symbol type
+       unsigned char st_other;
+       ElfHalf       st_shndx; // Or one of SHN_ABS, SHN_COMMON or SHN_UNDEF.
+       ElfAddr       st_value;
+       ElfAddr       st_size;
+} ElfSymbol;
+#else
+#error Bad size of void pointer
+#endif
+
+
+#define ELF_ST_BIND(i)   ((i)>>4)
+#define ELF_ST_TYPE(i)   ((i)&0xf)
+
+
+typedef enum {
+       EI_MAG0 = 0,
+       EI_MAG1 = 1,
+       EI_MAG2 = 2,
+       EI_MAG3 = 3,
+       EI_CLASS = 4,
+       EI_DATA = 5
+} ElfIdentFields;
+
+typedef enum {
+       ELF_FILE_TYPE_NONE = 0,
+       ELF_FILE_TYPE_REL = 1,
+       ELF_FILE_TYPE_EXEC = 2,
+       ELF_FILE_TYPE_DYN = 3,
+       ELF_FILE_TYPE_CORE = 4
+} ElfFileType;
+
+typedef enum {
+       ELF_CLASS_NONE = 0,
+       ELF_CLASS_32 = 1,
+       ELF_CLASS_64 = 2
+} ElfIdentClass;
+
+typedef enum {
+       ELF_DATA_NONE = 0,
+       ELF_DATA_LSB = 1,
+       ELF_DATA_MSB = 2
+} ElfIdentData;
+
+typedef enum {
+       ELF_SHT_NULL = 0,
+       ELF_SHT_PROGBITS = 1,
+       ELF_SHT_SYMTAB = 2,
+       ELF_SHT_STRTAB = 3,
+       ELF_SHT_RELA = 4,
+       ELF_SHT_HASH = 5,
+       ELF_SHT_DYNAMIC = 6,
+       ELF_SHT_NOTE = 7,
+       ELF_SHT_NOBITS = 8,
+       ELF_SHT_REL = 9,
+       ELF_SHT_SHLIB = 10,
+       ELF_SHT_DYNSYM = 11
+} ElfSectionType;
+
+typedef enum {
+       ELF_STT_NOTYPE = 0,
+       ELF_STT_OBJECT = 1,
+       ELF_STT_FUNC = 2,
+       ELF_STT_SECTION = 3,
+       ELF_STT_FILE = 4
+} ElfSymbolType;
+
+typedef enum {
+       ELF_SHF_WRITE = 1,
+       ELF_SHF_ALLOC = 2,
+       ELF_SHF_EXECINSTR = 4,
+} ElfSectionFlags;
+
+#define ELF_SHN_UNDEF       0
+#define ELF_SHN_LORESERVE   0xff00
+#define ELF_SHN_LOPROC      0xff00
+#define ELF_SHN_HIPROC      0xff1f
+#define ELF_SHN_ABS         0xfff1
+#define ELF_SHN_COMMON      0xfff2
+#define ELF_SHN_HIRESERVE   0xffff
+/* End of ELF definitions */
+
+typedef struct _ProfilerExecutableFileSectionRegion {
+       ProfilerExecutableMemoryRegionData *region;
+       guint8 *section_address;
+       gsize section_offset;
+} ProfilerExecutableFileSectionRegion;
+
+typedef struct _ProfilerExecutableFile {
+       guint32 reference_count;
+       
+       /* Used for mmap and munmap */
+       int fd;
+       guint8 *data;
+       size_t length;
+       
+       /* File data */
+       ElfHeader *header;
+       guint8 *symbols_start;
+       guint32 symbols_count;
+       guint32 symbol_size;
+       const char *symbols_string_table;
+       const char *main_string_table;
+       
+       ProfilerExecutableFileSectionRegion *section_regions;
+       
+       struct _ProfilerExecutableFile *next_new_file;
+} ProfilerExecutableFile;
 
-typedef struct _ProfilerUnmanagedFunctions {
+typedef struct _ProfilerExecutableFiles {
        GHashTable *table;
-       ProfilerUnmanagedFunction *unwritten_queue;
-       ProfilerUnmanagedFunction *unwritten_queue_end;
-       guint32 next_id;
-       ProfilerUnmanagedFunction actual_unwritten_queue_end;
-} ProfilerUnmanagedFunctions;
+       ProfilerExecutableFile *new_files;
+} ProfilerExecutableFiles;
+
+
+#define CLEANUP_WRITER_THREAD() do {profiler->writer_thread_terminated = TRUE;} while (0)
+#define CHECK_WRITER_THREAD() (! profiler->writer_thread_terminated)
 
 #ifndef PLATFORM_WIN32
 #include <sys/types.h>
@@ -284,6 +628,12 @@ typedef struct _ProfilerUnmanagedFunctions {
 #include <pthread.h>
 #include <semaphore.h>
 
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
 #define MUTEX_TYPE pthread_mutex_t
 #define INITIALIZE_PROFILER_MUTEX() pthread_mutex_init (&(profiler->mutex), NULL)
 #define DELETE_PROFILER_MUTEX() pthread_mutex_destroy (&(profiler->mutex))
@@ -293,7 +643,11 @@ typedef struct _ProfilerUnmanagedFunctions {
 #define THREAD_TYPE pthread_t
 #define CREATE_WRITER_THREAD(f) pthread_create (&(profiler->data_writer_thread), NULL, ((void*(*)(void*))f), NULL)
 #define EXIT_THREAD() pthread_exit (NULL);
-#define WAIT_WRITER_THREAD() pthread_join (profiler->data_writer_thread, NULL)
+#define WAIT_WRITER_THREAD() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+               pthread_join (profiler->data_writer_thread, NULL);\
+       }\
+} while (0)
 #define CURRENT_THREAD_ID() (gsize) pthread_self ()
 
 #ifndef HAVE_KW_THREAD
@@ -310,10 +664,26 @@ make_pthread_profiler_key (void) {
 #endif
 
 #define EVENT_TYPE sem_t
-#define WRITER_EVENT_INIT() (void) sem_init (&(profiler->statistical_data_writer_event), 0, 0)
-#define WRITER_EVENT_DESTROY() (void) sem_destroy (&(profiler->statistical_data_writer_event))
-#define WRITER_EVENT_WAIT() (void) sem_wait (&(profiler->statistical_data_writer_event))
-#define WRITER_EVENT_RAISE() (void) sem_post (&(profiler->statistical_data_writer_event))
+#define WRITER_EVENT_INIT() do {\
+       sem_init (&(profiler->enable_data_writer_event), 0, 0);\
+       sem_init (&(profiler->wake_data_writer_event), 0, 0);\
+       sem_init (&(profiler->done_data_writer_event), 0, 0);\
+} while (0)
+#define WRITER_EVENT_DESTROY() do {\
+       sem_destroy (&(profiler->enable_data_writer_event));\
+       sem_destroy (&(profiler->wake_data_writer_event));\
+       sem_destroy (&(profiler->done_data_writer_event));\
+} while (0)
+#define WRITER_EVENT_WAIT() (void) sem_wait (&(profiler->wake_data_writer_event))
+#define WRITER_EVENT_RAISE() (void) sem_post (&(profiler->wake_data_writer_event))
+#define WRITER_EVENT_ENABLE_WAIT() (void) sem_wait (&(profiler->enable_data_writer_event))
+#define WRITER_EVENT_ENABLE_RAISE() (void) sem_post (&(profiler->enable_data_writer_event))
+#define WRITER_EVENT_DONE_WAIT() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+               (void) sem_wait (&(profiler->done_data_writer_event));\
+       }\
+} while (0)
+#define WRITER_EVENT_DONE_RAISE() (void) sem_post (&(profiler->done_data_writer_event))
 
 #if 0
 #define FILE_HANDLE_TYPE FILE*
@@ -342,7 +712,11 @@ make_pthread_profiler_key (void) {
 #define THREAD_TYPE HANDLE
 #define CREATE_WRITER_THREAD(f) CreateThread (NULL, (1*1024*1024), (f), NULL, 0, NULL);
 #define EXIT_THREAD() ExitThread (0);
-#define WAIT_WRITER_THREAD() WaitForSingleObject (profiler->data_writer_thread, INFINITE)
+#define WAIT_WRITER_THREAD() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+                WaitForSingleObject (profiler->data_writer_thread, INFINITE);\
+       }\
+} while (0)
 #define CURRENT_THREAD_ID() (gsize) GetCurrentThreadId ()
 
 #ifndef HAVE_KW_THREAD
@@ -354,10 +728,27 @@ static guint32 profiler_thread_id = -1;
 #endif
 
 #define EVENT_TYPE HANDLE
-#define WRITER_EVENT_INIT() profiler->statistical_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL)
+#define WRITER_EVENT_INIT() (void) do {\
+       profiler->enable_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL);\
+       profiler->wake_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL);\
+       profiler->done_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL);\
+} while (0)
 #define WRITER_EVENT_DESTROY() CloseHandle (profiler->statistical_data_writer_event)
-#define WRITER_EVENT_WAIT() WaitForSingleObject (profiler->statistical_data_writer_event, INFINITE)
-#define WRITER_EVENT_RAISE() SetEvent (profiler->statistical_data_writer_event)
+#define WRITER_EVENT_INIT() (void) do {\
+       CloseHandle (profiler->enable_data_writer_event);\
+       CloseHandle (profiler->wake_data_writer_event);\
+       CloseHandle (profiler->done_data_writer_event);\
+} while (0)
+#define WRITER_EVENT_WAIT() WaitForSingleObject (profiler->wake_data_writer_event, INFINITE)
+#define WRITER_EVENT_RAISE() SetEvent (profiler->wake_data_writer_event)
+#define WRITER_EVENT_ENABLE_WAIT() WaitForSingleObject (profiler->enable_data_writer_event, INFINITE)
+#define WRITER_EVENT_ENABLE_RAISE() SetEvent (profiler->enable_data_writer_event)
+#define WRITER_EVENT_DONE_WAIT() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+               WaitForSingleObject (profiler->done_data_writer_event, INFINITE);\
+       }\
+} while (0)
+#define WRITER_EVENT_DONE_RAISE() SetEvent (profiler->done_data_writer_event)
 
 #define FILE_HANDLE_TYPE FILE*
 #define OPEN_FILE() profiler->file = fopen (profiler->file_name, "wb");
@@ -394,10 +785,15 @@ typedef struct _ProfilerFileWriteBuffer {
        guint8 buffer [];
 } ProfilerFileWriteBuffer;
 
+#define CHECK_PROFILER_ENABLED() do {\
+       if (! profiler->profiler_enabled)\
+               return;\
+} while (0)
 struct _MonoProfiler {
        MUTEX_TYPE mutex;
        
        MonoProfileFlags flags;
+       gboolean profiler_enabled;
        char *file_name;
        char *file_name_suffix;
        FILE_HANDLE_TYPE file;
@@ -407,6 +803,8 @@ struct _MonoProfiler {
        guint64 end_time;
        guint64 end_counter;
        
+       guint64 last_header_counter;
+       
        MethodIdMapping *methods;
        ClassIdMapping *classes;
        
@@ -420,11 +818,17 @@ struct _MonoProfiler {
        ProfilerStatisticalData *statistical_data;
        ProfilerStatisticalData *statistical_data_ready;
        ProfilerStatisticalData *statistical_data_second_buffer;
-       ProfilerUnmanagedFunctions unmanaged_functions;
+       int statistical_call_chain_depth;
+       
        THREAD_TYPE data_writer_thread;
-       EVENT_TYPE statistical_data_writer_event;
+       EVENT_TYPE enable_data_writer_event;
+       EVENT_TYPE wake_data_writer_event;
+       EVENT_TYPE done_data_writer_event;
        gboolean terminate_writer_thread;
+       gboolean writer_thread_terminated;
        gboolean detach_writer_thread;
+       gboolean writer_thread_enabled;
+       gboolean writer_thread_flush_everything;
        
        ProfilerFileWriteBuffer *write_buffers;
        ProfilerFileWriteBuffer *current_write_buffer;
@@ -439,8 +843,10 @@ struct _MonoProfiler {
        int dump_next_heap_snapshots;
        guint64 heap_shot_command_file_access_time;
        gboolean heap_shot_was_signalled;
+       guint32 garbage_collection_counter;
        
        ProfilerExecutableMemoryRegions *executable_regions;
+       ProfilerExecutableFiles executable_files;
        
        struct {
 #if (HAS_OPROFILE)
@@ -448,7 +854,13 @@ struct _MonoProfiler {
 #endif
                gboolean jit_time;
                gboolean unreachable_objects;
+               gboolean collection_summary;
                gboolean heap_shot;
+               gboolean track_stack;
+               gboolean track_calls;
+               gboolean save_allocation_caller;
+               gboolean save_allocation_stack;
+               gboolean allocations_carry_id;
        } action_flags;
 };
 static MonoProfiler *profiler;
@@ -464,6 +876,12 @@ static MonoProfiler *profiler;
 #define SIG_HANDLER_SIGNATURE(ftn) ftn (int _dummy)
 #endif
 
+static void
+request_heap_snapshot (void) {
+       profiler->heap_shot_was_signalled = TRUE;
+       mono_gc_collect (mono_gc_max_generation ());
+}
+
 static void
 SIG_HANDLER_SIGNATURE (gc_request_handler) {
        profiler->heap_shot_was_signalled = TRUE;
@@ -487,6 +905,45 @@ add_gc_request_handler (int signal_number)
        
        g_assert (sigaction (signal_number, &sa, NULL) != -1);
 }
+
+static void
+enable_profiler (void) {
+       profiler->profiler_enabled = TRUE;
+}
+
+static void
+disable_profiler (void) {
+       profiler->profiler_enabled = FALSE;
+}
+
+
+
+static void
+SIG_HANDLER_SIGNATURE (toggle_handler) {
+       if (profiler->profiler_enabled) {
+               profiler->profiler_enabled = FALSE;
+       } else {
+               profiler->profiler_enabled = TRUE;
+       }
+}
+
+static void
+add_toggle_handler (int signal_number)
+{
+       struct sigaction sa;
+       
+#ifdef MONO_ARCH_USE_SIGACTION
+       sa.sa_sigaction = toggle_handler;
+       sigemptyset (&sa.sa_mask);
+       sa.sa_flags = SA_SIGINFO;
+#else
+       sa.sa_handler = toggle_handler;
+       sigemptyset (&sa.sa_mask);
+       sa.sa_flags = 0;
+#endif
+       
+       g_assert (sigaction (signal_number, &sa, NULL) != -1);
+}
 #endif
 
 
@@ -498,7 +955,8 @@ add_gc_request_handler (int signal_number)
 #define DEBUG_CLASS_BITMAPS 0
 #define DEBUG_STATISTICAL_PROFILER 0
 #define DEBUG_WRITER_THREAD 0
-#if (DEBUG_LOGGING_PROFILER || DEBUG_STATISTICAL_PROFILER || DEBUG_HEAP_PROFILER || DEBUG_WRITER_THREAD)
+#define DEBUG_FILE_WRITES 0
+#if (DEBUG_LOGGING_PROFILER || DEBUG_STATISTICAL_PROFILER || DEBUG_HEAP_PROFILER || DEBUG_WRITER_THREAD || DEBUG_FILE_WRITES)
 #define LOG_WRITER_THREAD(m) printf ("WRITER-THREAD-LOG %s\n", m)
 #else
 #define LOG_WRITER_THREAD(m)
@@ -509,6 +967,166 @@ static int event_counter = 0;
 #define EVENT_MARK() printf ("[EVENT:%d]", ++ event_counter)
 #endif
 
+static void
+thread_stack_initialize_empty (ProfilerThreadStack *stack) {
+       stack->capacity = 0;
+       stack->top = 0;
+       stack->last_saved_top = 0;
+       stack->last_written_frame = 0;
+       stack->stack = NULL;
+       stack->method_is_jitted = NULL;
+       stack->written_frames = NULL;
+}
+
+static void
+thread_stack_free (ProfilerThreadStack *stack) {
+       stack->capacity = 0;
+       stack->top = 0;
+       stack->last_saved_top = 0;
+       stack->last_written_frame = 0;
+       if (stack->stack != NULL) {
+               g_free (stack->stack);
+               stack->stack = NULL;
+       }
+       if (stack->method_is_jitted != NULL) {
+               g_free (stack->method_is_jitted);
+               stack->method_is_jitted = NULL;
+       }
+       if (stack->written_frames != NULL) {
+               g_free (stack->written_frames);
+               stack->written_frames = NULL;
+       }
+}
+
+static void
+thread_stack_initialize (ProfilerThreadStack *stack, guint32 capacity) {
+       stack->capacity = capacity;
+       stack->top = 0;
+       stack->last_saved_top = 0;
+       stack->last_written_frame = 0;
+       stack->stack = g_new0 (MonoMethod*, capacity);
+       stack->method_is_jitted = g_new0 (guint8, capacity);
+       stack->written_frames = g_new0 (guint32, capacity);
+}
+
+static void
+thread_stack_push_jitted (ProfilerThreadStack *stack, MonoMethod* method, gboolean method_is_jitted) {
+       if (stack->top >= stack->capacity) {
+               MonoMethod **old_stack = stack->stack;
+               guint8 *old_method_is_jitted = stack->method_is_jitted;
+               guint32 *old_written_frames = stack->written_frames;
+               guint32 top = stack->top;
+               guint32 last_saved_top = stack->last_saved_top;
+               guint32 last_written_frame = stack->last_written_frame;
+               thread_stack_initialize (stack, stack->capacity * 2);
+               memcpy (stack->stack, old_stack, top * sizeof (MonoMethod*));
+               memcpy (stack->method_is_jitted, old_method_is_jitted, top * sizeof (guint8));
+               memcpy (stack->written_frames, old_written_frames, top * sizeof (guint32));
+               g_free (old_stack);
+               g_free (old_method_is_jitted);
+               g_free (old_written_frames);
+               stack->top = top;
+               stack->last_saved_top = last_saved_top;
+               stack->last_written_frame = last_written_frame;
+       }
+       stack->stack [stack->top] = method;
+       stack->method_is_jitted [stack->top] = method_is_jitted;
+       stack->top ++;
+}
+
+static inline void
+thread_stack_push (ProfilerThreadStack *stack, MonoMethod* method) {
+       thread_stack_push_jitted (stack, method, FALSE);
+}
+
+static MonoMethod*
+thread_stack_pop (ProfilerThreadStack *stack) {
+       if (stack->top > 0) {
+               stack->top --;
+               if (stack->last_saved_top > stack->top) {
+                       stack->last_saved_top = stack->top;
+               }
+               return stack->stack [stack->top];
+       } else {
+               return NULL;
+       }
+}
+
+static MonoMethod*
+thread_stack_top (ProfilerThreadStack *stack) {
+       if (stack->top > 0) {
+               return stack->stack [stack->top - 1];
+       } else {
+               return NULL;
+       }
+}
+
+static gboolean
+thread_stack_top_is_jitted (ProfilerThreadStack *stack) {
+       if (stack->top > 0) {
+               return stack->method_is_jitted [stack->top - 1];
+       } else {
+               return FALSE;
+       }
+}
+
+static MonoMethod*
+thread_stack_index_from_top (ProfilerThreadStack *stack, int index) {
+       if (stack->top > index) {
+               return stack->stack [stack->top - (index + 1)];
+       } else {
+               return NULL;
+       }
+}
+
+static gboolean
+thread_stack_index_from_top_is_jitted (ProfilerThreadStack *stack, int index) {
+       if (stack->top > index) {
+               return stack->method_is_jitted [stack->top - (index + 1)];
+       } else {
+               return FALSE;
+       }
+}
+
+static inline void
+thread_stack_push_safely (ProfilerThreadStack *stack, MonoMethod* method) {
+       if (stack->stack != NULL) {
+               thread_stack_push (stack, method);
+       }
+}
+
+static inline void
+thread_stack_push_jitted_safely (ProfilerThreadStack *stack, MonoMethod* method, gboolean method_is_jitted) {
+       if (stack->stack != NULL) {
+               thread_stack_push_jitted (stack, method, method_is_jitted);
+       }
+}
+
+static inline int
+thread_stack_count_unsaved_frames (ProfilerThreadStack *stack) {
+       int result = stack->top - stack->last_saved_top;
+       return (result > 0) ? result : 0;
+}
+
+static inline int
+thread_stack_get_last_written_frame (ProfilerThreadStack *stack) {
+       return stack->last_written_frame;
+}
+
+static inline void
+thread_stack_set_last_written_frame (ProfilerThreadStack *stack, int last_written_frame) {
+       stack->last_written_frame = last_written_frame;
+}
+
+static inline guint32
+thread_stack_written_frame_at_index (ProfilerThreadStack *stack, int index) {
+       return stack->written_frames [index];
+}
+
+static inline void
+thread_stack_write_frame_at_index (ProfilerThreadStack *stack, int index, guint32 method_id_and_is_jitted) {
+       stack->written_frames [index] = method_id_and_is_jitted;
+}
 
 static ClassIdMappingElement*
 class_id_mapping_element_get (MonoClass *klass) {
@@ -530,7 +1148,7 @@ static ClassIdMappingElement*
 class_id_mapping_element_new (MonoClass *klass) {
        ClassIdMappingElement *result = g_new (ClassIdMappingElement, 1);
        
-       result->name = g_strdup_printf ("%s.%s", mono_class_get_namespace (klass), mono_class_get_name (klass));
+       result->name = mono_type_full_name (mono_class_get_type (klass));
        result->klass = klass;
        result->next_unwritten = profiler->classes->unwritten;
        profiler->classes->unwritten = result;
@@ -870,72 +1488,6 @@ class_id_mapping_destroy (ClassIdMapping *map) {
        g_free (map);
 }
 
-static void
-unmanaged_function_new (ProfilerUnmanagedFunctions *functions, Dl_info *dl_info) {
-       ProfilerUnmanagedFunction *function = g_new (ProfilerUnmanagedFunction, 1);
-       function->id = functions->next_id;
-       functions->next_id ++;
-       function->hits = 1;
-       function->next_unwritten = functions->unwritten_queue;
-       functions->unwritten_queue = function;
-       function->name = g_strdup_printf ("[%s]:%s", dl_info->dli_fname, dl_info->dli_sname);
-       g_hash_table_insert (functions->table, dl_info->dli_saddr, function);
-}
-
-static void
-unmanaged_function_destroy (gpointer element) {
-       ProfilerUnmanagedFunction *function = (ProfilerUnmanagedFunction*) element;
-       if (function->name) {
-               g_free (function->name);
-               function->name = NULL;
-       }
-       g_free (function);
-}
-
-static gboolean
-unmanaged_function_hit (ProfilerUnmanagedFunctions *functions, gpointer address) {
-       Dl_info dl_info;
-       if (dladdr (address, &dl_info) && (dl_info.dli_saddr != NULL) && (dl_info.dli_fname != NULL)) {
-               ProfilerUnmanagedFunction *function = g_hash_table_lookup (functions->table, dl_info.dli_saddr);
-               
-               if (function != NULL) {
-                       if (function->next_unwritten != NULL) {
-                               function->hits ++;
-                       } else {
-                               function->hits = 1;
-                               function->next_unwritten = functions->unwritten_queue;
-                               functions->unwritten_queue = function;
-                       }
-               } else {
-                       unmanaged_function_new (functions, &dl_info);
-               }
-               
-               return TRUE;
-       } else {
-               return FALSE;
-       }
-}
-
-static void
-unmanaged_functions_init (ProfilerUnmanagedFunctions *functions) {
-       functions->next_id = 1;
-       functions->table = g_hash_table_new_full (g_direct_hash, NULL, NULL, unmanaged_function_destroy);
-       functions->unwritten_queue_end = &(functions->actual_unwritten_queue_end);
-       functions->unwritten_queue = functions->unwritten_queue_end;
-       functions->actual_unwritten_queue_end.hits = 0;
-       functions->actual_unwritten_queue_end.id = 0;
-       functions->actual_unwritten_queue_end.name = NULL;
-       functions->actual_unwritten_queue_end.next_unwritten = NULL;
-}
-
-static void
-unmanaged_functions_dispose (ProfilerUnmanagedFunctions *functions) {
-       functions->next_id = 0;
-       g_hash_table_destroy (functions->table);
-       functions->table = NULL;
-       functions->unwritten_queue = NULL;
-}
-
 #if (DEBUG_LOAD_EVENTS)
 static void
 print_load_event (const char *event_name, GHashTable *table, gpointer item, LoadedElement *element);
@@ -1062,24 +1614,49 @@ profiler_heap_shot_object_buffer_new (ProfilerPerThreadData *data) {
 }
 
 static ProfilerHeapShotWriteJob*
-profiler_heap_shot_write_job_new (gboolean heap_shot_was_signalled) {
+profiler_heap_shot_write_job_new (gboolean heap_shot_was_signalled, gboolean dump_heap_data, guint32 collection) {
        ProfilerHeapShotWriteJob *job = g_new (ProfilerHeapShotWriteJob, 1);
        job->next = NULL;
        job->next_unwritten = NULL;
-       job->buffers = g_new (ProfilerHeapShotWriteBuffer, 1);
-       job->buffers->next = NULL;
-       job->last_next = & (job->buffers->next);
-       job->start = & (job->buffers->buffer [0]);
-       job->cursor = job->start;
-       job->end = & (job->buffers->buffer [PROFILER_HEAP_SHOT_WRITE_BUFFER_SIZE]);
+       
+       if (profiler->action_flags.unreachable_objects || dump_heap_data) {
+               job->buffers = g_new (ProfilerHeapShotWriteBuffer, 1);
+               job->buffers->next = NULL;
+               job->last_next = & (job->buffers->next);
+               job->start = & (job->buffers->buffer [0]);
+               job->cursor = job->start;
+               job->end = & (job->buffers->buffer [PROFILER_HEAP_SHOT_WRITE_BUFFER_SIZE]);
+       } else {
+               job->buffers = NULL;
+               job->last_next = NULL;
+               job->start = NULL;
+               job->cursor = NULL;
+               job->end = NULL;
+       }
        job->full_buffers = 0;
+       
+       if (profiler->action_flags.collection_summary) {
+               job->summary.capacity = profiler->classes->next_id;
+               job->summary.per_class_data = g_new0 (ProfilerHeapShotClassSummary, job->summary.capacity);
+       } else {
+               job->summary.capacity = 0;
+               job->summary.per_class_data = NULL;
+       }
+
        job->heap_shot_was_signalled = heap_shot_was_signalled;
+       job->collection = collection;
+       job->dump_heap_data = dump_heap_data;
 #if DEBUG_HEAP_PROFILER
-       printf ("profiler_heap_shot_write_job_new: created job %p with buffer %p(%p-%p)\n", job, job->buffers, job->start, job->end);
+       printf ("profiler_heap_shot_write_job_new: created job %p with buffer %p(%p-%p) (collection %d, dump %d)\n", job, job->buffers, job->start, job->end, collection, dump_heap_data);
 #endif
        return job;
 }
 
+static gboolean
+profiler_heap_shot_write_job_has_data (ProfilerHeapShotWriteJob *job) {
+       return ((job->buffers != NULL) || (job->summary.capacity > 0));
+}
+
 static void
 profiler_heap_shot_write_job_add_buffer (ProfilerHeapShotWriteJob *job, gpointer value) {
        ProfilerHeapShotWriteBuffer *buffer = g_new (ProfilerHeapShotWriteBuffer, 1);
@@ -1116,6 +1693,12 @@ profiler_heap_shot_write_job_free_buffers (ProfilerHeapShotWriteJob *job) {
        }
        
        job->buffers = NULL;
+       
+       if (job->summary.per_class_data != NULL) {
+               g_free (job->summary.per_class_data);
+               job->summary.per_class_data = NULL;
+       }
+       job->summary.capacity = 0;
 }
 
 static void
@@ -1135,15 +1718,15 @@ profiler_process_heap_shot_write_jobs (void) {
                        next_job = current_job->next_unwritten;
                        
                        if (next_job != NULL) {
-                               if (current_job->buffers != NULL) {
+                               if (profiler_heap_shot_write_job_has_data (current_job)) {
                                        done = FALSE;
                                }
-                               if (next_job->buffers == NULL) {
+                               if (! profiler_heap_shot_write_job_has_data (next_job)) {
                                        current_job->next_unwritten = NULL;
                                        next_job = NULL;
                                }
                        } else {
-                               if (current_job->buffers != NULL) {
+                               if (profiler_heap_shot_write_job_has_data (current_job)) {
                                        LOG_WRITER_THREAD ("profiler_process_heap_shot_write_jobs: writing...");
                                        profiler_heap_shot_write_block (current_job);
                                        LOG_WRITER_THREAD ("profiler_process_heap_shot_write_jobs: done");
@@ -1181,6 +1764,7 @@ profiler_free_heap_shot_write_jobs (void) {
                        printf ("profiler_free_heap_shot_write_jobs: job %p will be freed\n", current_job);
 #endif
                        next_job = current_job->next;
+                       profiler_heap_shot_write_job_free_buffers (current_job);
                        g_free (current_job);
                        current_job = next_job;
                }
@@ -1244,9 +1828,16 @@ profiler_per_thread_data_new (guint32 buffer_size)
        data->last_event_counter = data->start_event_counter;
        data->thread_id = CURRENT_THREAD_ID ();
        data->heap_shot_object_buffers = NULL;
-       if ((profiler->action_flags.unreachable_objects == TRUE) || (profiler->action_flags.heap_shot == TRUE)) {
+       if ((profiler->action_flags.unreachable_objects == TRUE) ||
+                       (profiler->action_flags.heap_shot == TRUE) ||
+                       (profiler->action_flags.collection_summary == TRUE)) {
                profiler_heap_shot_object_buffer_new (data);
        }
+       if (profiler->action_flags.track_stack) {
+               thread_stack_initialize (&(data->stack), 64);
+       } else {
+               thread_stack_initialize_empty (&(data->stack));
+       }
        return data;
 }
 
@@ -1254,17 +1845,18 @@ static void
 profiler_per_thread_data_destroy (ProfilerPerThreadData *data) {
        g_free (data->events);
        profiler_heap_shot_object_buffers_destroy (data->heap_shot_object_buffers);
+       thread_stack_free (&(data->stack));
        g_free (data);
 }
 
 static ProfilerStatisticalData*
-profiler_statistical_data_new (guint32 buffer_size)
-{
+profiler_statistical_data_new (MonoProfiler *profiler) {
+       int buffer_size = profiler->statistical_buffer_size * (profiler->statistical_call_chain_depth + 1);
        ProfilerStatisticalData *data = g_new (ProfilerStatisticalData, 1);
 
-       data->addresses = g_new0 (gpointer, buffer_size);
+       data->hits = g_new0 (ProfilerStatisticalHit, buffer_size);
        data->next_free_index = 0;
-       data->end_index = buffer_size;
+       data->end_index = profiler->statistical_buffer_size;
        data->first_unwritten_index = 0;
        
        return data;
@@ -1272,7 +1864,7 @@ profiler_statistical_data_new (guint32 buffer_size)
 
 static void
 profiler_statistical_data_destroy (ProfilerStatisticalData *data) {
-       g_free (data->addresses);
+       g_free (data->hits);
        g_free (data);
 }
 
@@ -1316,7 +1908,17 @@ static void
 write_current_block (guint16 code) {
        guint32 size = (profiler->full_write_buffers * PROFILER_FILE_WRITE_BUFFER_SIZE) + profiler->current_write_position;
        ProfilerFileWriteBuffer *current_buffer = profiler->write_buffers;
-       guint8 header [6];
+       guint64 current_counter;
+       guint32 counter_delta;
+       guint8 header [10];
+       
+       MONO_PROFILER_GET_CURRENT_COUNTER (current_counter);
+       if (profiler->last_header_counter != 0) {
+               counter_delta = current_counter - profiler->last_header_counter;
+       } else {
+               counter_delta = 0;
+       }
+       profiler->last_header_counter = current_counter;
        
        header [0] = code & 0xff;
        header [1] = (code >> 8) & 0xff;
@@ -1324,18 +1926,34 @@ write_current_block (guint16 code) {
        header [3] = (size >> 8) & 0xff;
        header [4] = (size >> 16) & 0xff;
        header [5] = (size >> 24) & 0xff;
+       header [6] = counter_delta & 0xff;
+       header [7] = (counter_delta >> 8) & 0xff;
+       header [8] = (counter_delta >> 16) & 0xff;
+       header [9] = (counter_delta >> 24) & 0xff;
        
-       WRITE_BUFFER (& (header [0]), 6);
+#if (DEBUG_FILE_WRITES)
+       printf ("write_current_block: writing header (code %d)\n", code);
+#endif
+       WRITE_BUFFER (& (header [0]), 10);
        
        while ((current_buffer != NULL) && (profiler->full_write_buffers > 0)) {
+#if (DEBUG_FILE_WRITES)
+               printf ("write_current_block: writing buffer (size %d)\n", PROFILER_FILE_WRITE_BUFFER_SIZE);
+#endif
                WRITE_BUFFER (& (current_buffer->buffer [0]), PROFILER_FILE_WRITE_BUFFER_SIZE);
                profiler->full_write_buffers --;
                current_buffer = current_buffer->next;
        }
        if (profiler->current_write_position > 0) {
+#if (DEBUG_FILE_WRITES)
+               printf ("write_current_block: writing last buffer (size %d)\n", profiler->current_write_position);
+#endif
                WRITE_BUFFER (& (current_buffer->buffer [0]), profiler->current_write_position);
        }
        FLUSH_FILE ();
+#if (DEBUG_FILE_WRITES)
+       printf ("write_current_block: buffers flushed\n");
+#endif
        
        profiler->current_write_buffer = profiler->write_buffers;
        profiler->current_write_position = 0;
@@ -1371,6 +1989,28 @@ write_string (const char *string) {
        WRITE_BYTE (0);
 }
 
+static void write_clock_data (void);
+static void
+write_directives_block (gboolean start) {
+       write_clock_data ();
+       
+       if (start) {
+               if (profiler->action_flags.save_allocation_caller) {
+                       write_uint32 (MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_CALLER);
+               }
+               if (profiler->action_flags.save_allocation_stack || profiler->action_flags.track_calls) {
+                       write_uint32 (MONO_PROFILER_DIRECTIVE_ALLOCATIONS_HAVE_STACK);
+               }
+               if (profiler->action_flags.allocations_carry_id) {
+                       write_uint32 (MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_ID);
+               }
+       }
+       write_uint32 (MONO_PROFILER_DIRECTIVE_END);
+       
+       write_clock_data ();
+       write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_DIRECTIVES);
+}
+
 #if DEBUG_HEAP_PROFILER
 #define WRITE_HEAP_SHOT_JOB_VALUE_MESSAGE(v,c) printf ("WRITE_HEAP_SHOT_JOB_VALUE: writing value %p at cursor %p\n", (v), (c))
 #else
@@ -1386,10 +2026,18 @@ write_string (const char *string) {
        }\
 } while (0)
 
+
 #undef GUINT_TO_POINTER
-#define GUINT_TO_POINTER(u) ((void*)(guint64)(u))
 #undef GPOINTER_TO_UINT
+#if (SIZEOF_VOID_P == 4)
+#define GUINT_TO_POINTER(u) ((void*)(guint32)(u))
+#define GPOINTER_TO_UINT(p) ((guint32)(void*)(p))
+#elif (SIZEOF_VOID_P == 8)
+#define GUINT_TO_POINTER(u) ((void*)(guint64)(u))
 #define GPOINTER_TO_UINT(p) ((guint64)(void*)(p))
+#else
+#error Bad size of void pointer
+#endif
 
 #define WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE(j,v,c) WRITE_HEAP_SHOT_JOB_VALUE (j, GUINT_TO_POINTER (GPOINTER_TO_UINT (v)|(c)))
 
@@ -1417,7 +2065,7 @@ write_string (const char *string) {
 } while (0)
 
 static void
-profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
+profiler_heap_shot_write_data_block (ProfilerHeapShotWriteJob *job) {
        ProfilerHeapShotWriteBuffer *buffer;
        gpointer* cursor;
        gpointer* end;
@@ -1430,13 +2078,13 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
        write_uint64 (job->start_time);
        write_uint64 (job->end_counter);
        write_uint64 (job->end_time);
-       
+       write_uint32 (job->collection);
        MONO_PROFILER_GET_CURRENT_COUNTER (start_counter);
        MONO_PROFILER_GET_CURRENT_TIME (start_time);
        write_uint64 (start_counter);
        write_uint64 (start_time);
 #if DEBUG_HEAP_PROFILER
-       printf ("profiler_heap_shot_write_block: working on job %p...\n", job);
+       printf ("profiler_heap_shot_write_data_block: start writing job %p (start %p, end %p)...\n", job, & (job->buffers->buffer [0]), job->cursor);
 #endif
        buffer = job->buffers;
        cursor = & (buffer->buffer [0]);
@@ -1449,13 +2097,13 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                cursor = NULL;
        }
 #if DEBUG_HEAP_PROFILER
-       printf ("profiler_heap_shot_write_block: in job %p, starting at buffer %p and cursor %p\n", job, buffer, cursor);
+       printf ("profiler_heap_shot_write_data_block: in job %p, starting at buffer %p and cursor %p\n", job, buffer, cursor);
 #endif
        while (cursor != NULL) {
                gpointer value = *cursor;
                HeapProfilerJobValueCode code = GPOINTER_TO_UINT (value) & HEAP_CODE_MASK;
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_shot_write_block: got value %p and code %d\n", value, code);
+               printf ("profiler_heap_shot_write_data_block: got value %p and code %d\n", value, code);
 #endif
                
                UPDATE_JOB_BUFFER_CURSOR ();
@@ -1467,7 +2115,7 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                        
                        class_id = class_id_mapping_element_get (klass);
                        if (class_id == NULL) {
-                               printf ("profiler_heap_shot_write_block: unknown class %p", klass);
+                               printf ("profiler_heap_shot_write_data_block: unknown class %p", klass);
                        }
                        g_assert (class_id != NULL);
                        write_uint32 ((class_id->id << 2) | HEAP_CODE_FREE_OBJECT_CLASS);
@@ -1476,7 +2124,7 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                        UPDATE_JOB_BUFFER_CURSOR ();
                        write_uint32 (size);
 #if DEBUG_HEAP_PROFILER
-                       printf ("profiler_heap_shot_write_block: wrote unreachable object of class %p (id %d, size %d)\n", klass, class_id->id, size);
+                       printf ("profiler_heap_shot_write_data_block: wrote unreachable object of class %p (id %d, size %d)\n", klass, class_id->id, size);
 #endif
                } else if (code == HEAP_CODE_OBJECT) {
                        MonoObject *object = GUINT_TO_POINTER (GPOINTER_TO_UINT (value) & (~ (guint64) HEAP_CODE_MASK));
@@ -1487,7 +2135,7 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                        UPDATE_JOB_BUFFER_CURSOR ();
                        
                        if (class_id == NULL) {
-                               printf ("profiler_heap_shot_write_block: unknown class %p", klass);
+                               printf ("profiler_heap_shot_write_data_block: unknown class %p", klass);
                        }
                        g_assert (class_id != NULL);
                        
@@ -1496,7 +2144,7 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                        write_uint32 (size);
                        write_uint32 (references);
 #if DEBUG_HEAP_PROFILER
-                       printf ("profiler_heap_shot_write_block: writing object %p (references %d)\n", value, references);
+                       printf ("profiler_heap_shot_write_data_block: writing object %p (references %d)\n", value, references);
 #endif
                        
                        while (references > 0) {
@@ -1505,12 +2153,12 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
                                UPDATE_JOB_BUFFER_CURSOR ();
                                references --;
 #if DEBUG_HEAP_PROFILER
-                               printf ("profiler_heap_shot_write_block:   inside object %p, wrote reference %p)\n", value, reference);
+                               printf ("profiler_heap_shot_write_data_block:   inside object %p, wrote reference %p)\n", value, reference);
 #endif
                        }
                } else {
 #if DEBUG_HEAP_PROFILER
-                       printf ("profiler_heap_shot_write_block: unknown code %d in value %p\n", code, value);
+                       printf ("profiler_heap_shot_write_data_block: unknown code %d in value %p\n", code, value);
 #endif
                        g_assert_not_reached ();
                }
@@ -1522,21 +2170,78 @@ profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
        write_uint64 (end_counter);
        write_uint64 (end_time);
        
-       write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_HEAP);
-       
-       profiler_heap_shot_write_job_free_buffers (job);
+       write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_HEAP_DATA);
 #if DEBUG_HEAP_PROFILER
-       printf ("profiler_heap_shot_write_block: work on job %p done.\n", job);
+       printf ("profiler_heap_shot_write_data_block: writing job %p done.\n", job);
 #endif
 }
-
 static void
-write_element_load_block (LoadedElement *element, guint8 kind, gsize thread_id) {
-       WRITE_BYTE (kind);
-       write_uint64 (element->load_start_counter);
-       write_uint64 (element->load_end_counter);
-       write_uint64 (thread_id);
-       write_string (element->name);
+profiler_heap_shot_write_summary_block (ProfilerHeapShotWriteJob *job) {
+       guint64 start_counter;
+       guint64 start_time;
+       guint64 end_counter;
+       guint64 end_time;
+       int id;
+       
+#if DEBUG_HEAP_PROFILER
+       printf ("profiler_heap_shot_write_summary_block: start writing job %p...\n", job);
+#endif
+       MONO_PROFILER_GET_CURRENT_COUNTER (start_counter);
+       MONO_PROFILER_GET_CURRENT_TIME (start_time);
+       write_uint64 (start_counter);
+       write_uint64 (start_time);
+       
+       write_uint32 (job->collection);
+       
+       for (id = 0; id < job->summary.capacity; id ++) {
+               if ((job->summary.per_class_data [id].reachable.instances > 0) || (job->summary.per_class_data [id].unreachable.instances > 0)) {
+                       write_uint32 (id);
+                       write_uint32 (job->summary.per_class_data [id].reachable.instances);
+                       write_uint32 (job->summary.per_class_data [id].reachable.bytes);
+                       write_uint32 (job->summary.per_class_data [id].unreachable.instances);
+                       write_uint32 (job->summary.per_class_data [id].unreachable.bytes);
+               }
+       }
+       write_uint32 (0);
+       
+       MONO_PROFILER_GET_CURRENT_COUNTER (end_counter);
+       MONO_PROFILER_GET_CURRENT_TIME (end_time);
+       write_uint64 (end_counter);
+       write_uint64 (end_time);
+       
+       write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_HEAP_SUMMARY);
+#if DEBUG_HEAP_PROFILER
+       printf ("profiler_heap_shot_write_summary_block: writing job %p done.\n", job);
+#endif
+}
+
+static void
+profiler_heap_shot_write_block (ProfilerHeapShotWriteJob *job) {
+#if DEBUG_HEAP_PROFILER
+       printf ("profiler_heap_shot_write_block: working on job %p...\n", job);
+#endif
+       
+       if (profiler->action_flags.collection_summary == TRUE) {
+               profiler_heap_shot_write_summary_block (job);
+       }
+       
+       if ((profiler->action_flags.unreachable_objects == TRUE) || (profiler->action_flags.heap_shot == TRUE)) {
+               profiler_heap_shot_write_data_block (job);
+       }
+       
+       profiler_heap_shot_write_job_free_buffers (job);
+#if DEBUG_HEAP_PROFILER
+       printf ("profiler_heap_shot_write_block: work on job %p done.\n", job);
+#endif
+}
+
+static void
+write_element_load_block (LoadedElement *element, guint8 kind, gsize thread_id) {
+       WRITE_BYTE (kind);
+       write_uint64 (element->load_start_counter);
+       write_uint64 (element->load_end_counter);
+       write_uint64 (thread_id);
+       write_string (element->name);
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_LOADED);
        element->load_written = TRUE;
 }
@@ -1572,7 +2277,7 @@ write_mapping_block (gsize thread_id) {
        if ((profiler->classes->unwritten == NULL) && (profiler->methods->unwritten == NULL))
                return;
        
-#if (DEBUG_MAPPING_EVENTS)
+#if (DEBUG_MAPPING_EVENTS || DEBUG_FILE_WRITES)
        printf ("[write_mapping_block][TID %ld] START\n", thread_id);
 #endif
        
@@ -1611,18 +2316,11 @@ write_mapping_block (gsize thread_id) {
        write_clock_data ();
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_MAPPING);
        
-#if (DEBUG_MAPPING_EVENTS)
+#if (DEBUG_MAPPING_EVENTS || DEBUG_FILE_WRITES)
        printf ("[write_mapping_block][TID %ld] END\n", thread_id);
 #endif
 }
 
-static guint64
-get_extended_event_value (ProfilerEventData *event, ProfilerEventData *next) {
-       guint64 result = next->data.number;
-       result |= (((guint64) event->value) << 32);
-       return result;
-}
-
 typedef enum {
        MONO_PROFILER_PACKED_EVENT_CODE_METHOD_ENTER = 1,
        MONO_PROFILER_PACKED_EVENT_CODE_METHOD_EXIT_IMPLICIT = 2,
@@ -1644,17 +2342,78 @@ typedef enum {
        result = ((base)|((((kind)<<4) | (code)) << MONO_PROFILER_PACKED_EVENT_CODE_BITS));\
 } while (0)
 
+static void
+rewrite_last_written_stack (ProfilerThreadStack *stack) {
+       guint8 event_code;
+       int i = thread_stack_get_last_written_frame (stack);
+       
+       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, MONO_PROFILER_EVENT_STACK_SECTION, 0, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+       WRITE_BYTE (event_code);
+       write_uint32 (0);
+       write_uint32 (i);
+       
+       while (i > 0) {
+               i--;
+               write_uint32 (thread_stack_written_frame_at_index (stack, i));
+       }
+}
+
+
 static ProfilerEventData*
-write_event (ProfilerEventData *event) {
+write_stack_section_event (ProfilerEventData *events, ProfilerPerThreadData *data) {
+       int last_saved_frame = events->data.number;
+       int saved_frames = events->value;
+       guint8 event_code;
+       int i;
+       
+       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, MONO_PROFILER_EVENT_STACK_SECTION, 0, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+       WRITE_BYTE (event_code);
+       write_uint32 (last_saved_frame);
+       write_uint32 (saved_frames);
+       thread_stack_set_last_written_frame (&(data->stack), last_saved_frame + saved_frames);
+       events++;
+       
+       for (i = 0; i < saved_frames; i++) {
+               guint8 code = events->code;
+               guint32 jit_flag;
+               MethodIdMappingElement *method;
+               guint32 frame_value;
+               
+               if (code == MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER) {
+                       jit_flag = 0;
+               } else if (code == MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER) {
+                       jit_flag = 1;
+               } else {
+                       g_assert_not_reached ();
+                       jit_flag = 0;
+               }
+               
+               method = method_id_mapping_element_get (events->data.address);
+               g_assert (method != NULL);
+               frame_value = (method->id << 1) | jit_flag;
+               write_uint32 (frame_value);
+               thread_stack_write_frame_at_index (&(data->stack), last_saved_frame + saved_frames - (1 + i), frame_value);
+               events ++;
+       }
+       
+       return events;
+}
+
+static ProfilerEventData*
+write_event (ProfilerEventData *event, ProfilerPerThreadData *data) {
        ProfilerEventData *next = event + 1;
        gboolean write_event_value = TRUE;
        guint8 event_code;
        guint64 event_data;
        guint64 event_value;
+       gboolean write_event_value_extension_1 = FALSE;
+       guint64 event_value_extension_1 = 0;
+       gboolean write_event_value_extension_2 = FALSE;
+       guint64 event_value_extension_2 = 0;
 
        event_value = event->value;
-       if (event_value > MAX_EVENT_VALUE) {
-               event_value = get_extended_event_value (event, next);
+       if (event_value == MAX_EVENT_VALUE) {
+               event_value = *((guint64*)next);
                next ++;
        }
        
@@ -1678,13 +2437,54 @@ write_event (ProfilerEventData *event) {
                event_data = element->id;
                
                if (event->code == MONO_PROFILER_EVENT_CLASS_ALLOCATION) {
-                       MONO_PROFILER_EVENT_MAKE_PACKED_CODE (event_code, event_data, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_ALLOCATION);
+                       if ((! profiler->action_flags.save_allocation_caller) || (! (next->code == MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER))) {
+                               MONO_PROFILER_EVENT_MAKE_PACKED_CODE (event_code, event_data, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_ALLOCATION);
+                       } else {
+                               MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, MONO_PROFILER_EVENT_JIT_TIME_ALLOCATION, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+                       }
+                       
+                       if (profiler->action_flags.save_allocation_caller) {
+                               MonoMethod *caller_method = next->data.address;
+                               
+                               if ((next->code != MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER) && (next->code != MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER)) {
+                                       g_assert_not_reached ();
+                               }
+                               
+                               if (caller_method != NULL) {
+                                       MethodIdMappingElement *caller = method_id_mapping_element_get (caller_method);
+                                       g_assert (caller != NULL);
+                                       event_value_extension_1 = caller->id;
+                               }
+
+                               write_event_value_extension_1 = TRUE;
+                               next ++;
+                       }
+                       
+                       if (profiler->action_flags.allocations_carry_id) {
+                               event_value_extension_2  = GPOINTER_TO_UINT (next->data.address);
+                               
+                               if (next->code != MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID) {
+                                       g_assert_not_reached ();
+                               }
+                               
+                               write_event_value_extension_2 = TRUE;
+                               next ++;
+                       }
                } else {
                        MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_EVENT);
                }
        } else {
-               event_data = event->data.number;
-               MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+               if (event->code == MONO_PROFILER_EVENT_STACK_SECTION) {
+                       return write_stack_section_event (event, data);
+               } else {
+                       event_data = event->data.number;
+                       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+               }
+       }
+       
+       /* Skip writing JIT events if the user did not ask for them */
+       if ((event->code == MONO_PROFILER_EVENT_METHOD_JIT) && ! profiler->action_flags.jit_time) {
+               return next;
        }
        
 #if (DEBUG_LOGGING_PROFILER)
@@ -1698,6 +2498,12 @@ write_event (ProfilerEventData *event) {
        write_uint64 (event_data);
        if (write_event_value) {
                write_uint64 (event_value);
+               if (write_event_value_extension_1) {
+                       write_uint64 (event_value_extension_1);
+               }
+               if (write_event_value_extension_2) {
+                       write_uint64 (event_value_extension_2);
+               }
        }
        
        return next;
@@ -1710,20 +2516,28 @@ write_thread_data_block (ProfilerPerThreadData *data) {
        
        if (start == end)
                return;
-       
+#if (DEBUG_FILE_WRITES)
+       printf ("write_thread_data_block: preparing buffer for thread %ld\n", (guint64) data->thread_id);
+#endif
        write_clock_data ();
        write_uint64 (data->thread_id);
        
        write_uint64 (data->start_event_counter);
        
+       /* Make sure that stack sections can be fully reconstructed even reading only one block */
+       rewrite_last_written_stack (&(data->stack));
+       
        while (start < end) {
-               start = write_event (start);
+               start = write_event (start, data);
        }
        WRITE_BYTE (0);
        data->first_unwritten_event = end;
        
        write_clock_data ();
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_EVENTS);
+#if (DEBUG_FILE_WRITES)
+       printf ("write_thread_data_block: buffer for thread %ld written\n", (guint64) data->thread_id);
+#endif
 }
 
 static ProfilerExecutableMemoryRegionData*
@@ -1735,24 +2549,44 @@ profiler_executable_memory_region_new (gpointer *start, gpointer *end, guint32 f
        result->file_name = g_strdup (file_name);
        result->id = id;
        result->is_new = TRUE;
+       
+       result->file = NULL;
+       result->file_region_reference = NULL;
+       result->symbols_capacity = id;
+       result->symbols_count = id;
+       result->symbols = NULL;
+       
        return result;
 }
 
+static void
+executable_file_close (ProfilerExecutableMemoryRegionData *region);
+
 static void
 profiler_executable_memory_region_destroy (ProfilerExecutableMemoryRegionData *data) {
+       if (data->file != NULL) {
+               executable_file_close (data);
+               data->file = NULL;
+       }
+       if (data->symbols != NULL) {
+               g_free (data->symbols);
+               data->symbols = NULL;
+       }
        if (data->file_name != NULL) {
                g_free (data->file_name);
+               data->file_name = NULL;
        }
        g_free (data);
 }
 
 static ProfilerExecutableMemoryRegions*
-profiler_executable_memory_regions_new (int next_id) {
+profiler_executable_memory_regions_new (int next_id, int next_unmanaged_function_id) {
        ProfilerExecutableMemoryRegions *result = g_new (ProfilerExecutableMemoryRegions, 1);
        result->regions = g_new0 (ProfilerExecutableMemoryRegionData*, 32);
        result->regions_capacity = 32;
        result->regions_count = 0;
        result->next_id = next_id;
+       result->next_unmanaged_function_id = next_unmanaged_function_id;
        return result;
 }
 
@@ -1824,41 +2658,446 @@ append_region (ProfilerExecutableMemoryRegions *regions, gpointer *start, gpoint
        regions->next_id ++;
 }
 
+static gboolean
+regions_are_equivalent (ProfilerExecutableMemoryRegionData *region1, ProfilerExecutableMemoryRegionData *region2) {
+       if ((region1->start == region2->start) &&
+                       (region1->end == region2->end) &&
+                       (region1->file_offset == region2->file_offset) &&
+                       ! strcmp (region1->file_name, region2->file_name)) {
+               return TRUE;
+       } else {
+               return FALSE;
+       }
+}
+
+static int
+compare_regions (const void *a1, const void *a2) {
+       ProfilerExecutableMemoryRegionData *r1 = * (ProfilerExecutableMemoryRegionData**) a1;
+       ProfilerExecutableMemoryRegionData *r2 = * (ProfilerExecutableMemoryRegionData**) a2;
+       return (r1->start < r2->start)? -1 : ((r1->start > r2->start)? 1 : 0);
+}
+
 static void
-restore_region_ids (ProfilerExecutableMemoryRegions *old_regions, ProfilerExecutableMemoryRegions *new_regions) {
+restore_old_regions (ProfilerExecutableMemoryRegions *old_regions, ProfilerExecutableMemoryRegions *new_regions) {
        int old_i;
        int new_i;
        
-       for (old_i = 0; old_i < old_regions->regions_count; old_i++) {
-               ProfilerExecutableMemoryRegionData *old_region = old_regions->regions [old_i];
-               for (new_i = 0; new_i < new_regions->regions_count; new_i++) {
-                       ProfilerExecutableMemoryRegionData *new_region = new_regions->regions [new_i];
-                       if ((old_region->start == new_region->start) &&
-                                       (old_region->end == new_region->end) &&
-                                       (old_region->file_offset == new_region->file_offset) &&
-                                       ! strcmp (old_region->file_name, new_region->file_name)) {
-                               new_region->is_new = FALSE;
-                               new_region->id = old_region->id;
-                               old_region->is_new = TRUE;
+       for (new_i = 0; new_i < new_regions->regions_count; new_i++) {
+               ProfilerExecutableMemoryRegionData *new_region = new_regions->regions [new_i];
+               for (old_i = 0; old_i < old_regions->regions_count; old_i++) {
+                       ProfilerExecutableMemoryRegionData *old_region = old_regions->regions [old_i];
+                       if ( regions_are_equivalent (old_region, new_region)) {
+                               new_regions->regions [new_i] = old_region;
+                               old_regions->regions [old_i] = new_region;
+                               
+                               // FIXME (sanity check)
+                               g_assert (new_region->is_new && ! old_region->is_new);
+                       }
+               }
+       }
+}
+
+static void
+sort_regions (ProfilerExecutableMemoryRegions *regions) {
+       if (regions->regions_count > 1) {
+               int i;
+               
+               qsort (regions->regions, regions->regions_count, sizeof (ProfilerExecutableMemoryRegionData *), compare_regions);
+               
+               i = 1;
+               while (i < regions->regions_count) {
+                       ProfilerExecutableMemoryRegionData *current_region = regions->regions [i];
+                       ProfilerExecutableMemoryRegionData *previous_region = regions->regions [i - 1];
+                       
+                       if (regions_are_equivalent (previous_region, current_region)) {
+                               int j;
+                               
+                               if (! current_region->is_new) {
+                                       profiler_executable_memory_region_destroy (previous_region);
+                                       regions->regions [i - 1] = current_region;
+                               } else {
+                                       profiler_executable_memory_region_destroy (current_region);
+                               }
+                               
+                               for (j = i + 1; j < regions->regions_count; j++) {
+                                       regions->regions [j - 1] = regions->regions [j];
+                               }
+                               
+                               regions->regions_count --;
+                       } else {
+                               i++;
+                       }
+               }
+       }
+}
+
+static void
+fix_region_references (ProfilerExecutableMemoryRegions *regions) {
+       int i;
+       for (i = 0; i < regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = regions->regions [i];
+               if (region->file_region_reference != NULL) {
+                       region->file_region_reference->region = region;
+               }
+       }
+}
+
+static void
+executable_file_add_region_reference (ProfilerExecutableFile *file, ProfilerExecutableMemoryRegionData *region) {
+       guint8 *section_headers = file->data + file->header->e_shoff;
+       int section_index;
+       
+       for (section_index = 1; section_index < file->header->e_shnum; section_index ++) {
+               ElfSection *section_header = (ElfSection*) (section_headers + (file->header->e_shentsize * section_index));
+               
+               if ((section_header->sh_addr != 0) && (section_header->sh_flags & ELF_SHF_EXECINSTR) &&
+                               (region->file_offset <= section_header->sh_offset) && (region->file_offset + (((guint8*)region->end)-((guint8*)region->start)) >= (section_header->sh_offset + section_header->sh_size))) {
+                       ProfilerExecutableFileSectionRegion *section_region = & (file->section_regions [section_index]);
+                       section_region->region = region;
+                       section_region->section_address = (gpointer) section_header->sh_addr;
+                       section_region->section_offset = section_header->sh_offset;
+                       region->file_region_reference = section_region;
+               }
+       }
+}
+
+static ProfilerExecutableFile*
+executable_file_open (ProfilerExecutableMemoryRegionData *region) {
+       ProfilerExecutableFiles *files = & (profiler->executable_files);
+       ProfilerExecutableFile *file = region->file;
+       
+       if (file == NULL) {
+               file = (ProfilerExecutableFile*) g_hash_table_lookup (files->table, region->file_name);
+               
+               if (file == NULL) {
+                       guint16 test = 0x0102;
+                       struct stat stat_buffer;
+                       int symtab_index = 0;
+                       int strtab_index = 0;
+                       int dynsym_index = 0;
+                       int dynstr_index = 0;
+                       ElfHeader *header;
+                       guint8 *section_headers;
+                       int section_index;
+                       int strings_index;
+                       
+                       file = g_new0 (ProfilerExecutableFile, 1);
+                       region->file = file;
+                       g_hash_table_insert (files->table, region->file_name, file);
+                       file->reference_count ++;
+                       file->next_new_file = files->new_files;
+                       files->new_files = file;
+                       
+                       file->fd = open (region->file_name, O_RDONLY);
+                       if (file->fd == -1) {
+                               //g_warning ("Cannot open file '%s': '%s'", region->file_name, strerror (errno));
+                               return file;
+                       } else {
+                               if (fstat (file->fd, &stat_buffer) != 0) {
+                                       //g_warning ("Cannot stat file '%s': '%s'", region->file_name, strerror (errno));
+                                       return file;
+                               } else {
+                                       size_t region_length = ((guint8*)region->end) - ((guint8*)region->start);
+                                       file->length = stat_buffer.st_size;
+                                       
+                                       if (file->length == region_length) {
+                                               file->data = region->start;
+                                               close (file->fd);
+                                               file->fd = -1;
+                                       } else {
+                                               file->data = mmap (NULL, file->length, PROT_READ, MAP_PRIVATE, file->fd, 0);
+                                               
+                                               if (file->data == MAP_FAILED) {
+                                                       close (file->fd);
+                                                       //g_warning ("Cannot map file '%s': '%s'", region->file_name, strerror (errno));
+                                                       file->data = NULL;
+                                                       return file;
+                                               }
+                                       }
+                               }
+                       }
+                       
+                       header = (ElfHeader*) file->data;
+                       
+                       if ((header->e_ident [EI_MAG0] != 0x7f) || (header->e_ident [EI_MAG1] != 'E') ||
+                                       (header->e_ident [EI_MAG2] != 'L') || (header->e_ident [EI_MAG3] != 'F')) {
+                               return file;
+                       }
+                       
+                       if (sizeof (gsize) == 4) {
+                               if (header->e_ident [EI_CLASS] != ELF_CLASS_32) {
+                                       g_warning ("Class is not ELF_CLASS_32 with gsize size %d", (int) sizeof (gsize));
+                                       return file;
+                               }
+                       } else if (sizeof (gsize) == 8) {
+                               if (header->e_ident [EI_CLASS] != ELF_CLASS_64) {
+                                       g_warning ("Class is not ELF_CLASS_64 with gsize size %d", (int) sizeof (gsize));
+                                       return file;
+                               }
+                       } else {
+                               g_warning ("Absurd gsize size %d", (int) sizeof (gsize));
+                               return file;
+                       }
+                       
+                       if ((*(guint8*)(&test)) == 0x01) {
+                               if (header->e_ident [EI_DATA] != ELF_DATA_MSB) {
+                                       g_warning ("Data is not ELF_DATA_MSB with first test byte 0x01");
+                                       return file;
+                               }
+                       } else if ((*(guint8*)(&test)) == 0x02) {
+                               if (header->e_ident [EI_DATA] != ELF_DATA_LSB) {
+                                       g_warning ("Data is not ELF_DATA_LSB with first test byte 0x02");
+                                       return file;
+                               }
+                       } else {
+                               g_warning ("Absurd test byte value");
+                               return file;
+                       }
+                       
+                       /* OK, this is a usable elf file... */
+                       file->header = header;
+                       section_headers = file->data + header->e_shoff;
+                       file->main_string_table = ((const char*) file->data) + (((ElfSection*) (section_headers + (header->e_shentsize * header->e_shstrndx)))->sh_offset);
+                       
+                       for (section_index = 0; section_index < header->e_shnum; section_index ++) {
+                               ElfSection *section_header = (ElfSection*) (section_headers + (header->e_shentsize * section_index));
+                               
+                               if (section_header->sh_type == ELF_SHT_SYMTAB) {
+                                       symtab_index = section_index;
+                               } else if (section_header->sh_type == ELF_SHT_DYNSYM) {
+                                       dynsym_index = section_index;
+                               } else if (section_header->sh_type == ELF_SHT_STRTAB) {
+                                       if (! strcmp (file->main_string_table + section_header->sh_name, ".strtab")) {
+                                               strtab_index = section_index;
+                                       } else if (! strcmp (file->main_string_table + section_header->sh_name, ".dynstr")) {
+                                               dynstr_index = section_index;
+                                       }
+                               }
+                       }
+                       
+                       if ((symtab_index != 0) && (strtab_index != 0)) {
+                               section_index = symtab_index;
+                               strings_index = strtab_index;
+                       } else if ((dynsym_index != 0) && (dynstr_index != 0)) {
+                               section_index = dynsym_index;
+                               strings_index = dynstr_index;
+                       } else {
+                               section_index = 0;
+                               strings_index = 0;
+                       }
+                       
+                       if (section_index != 0) {
+                               ElfSection *section_header = (ElfSection*) (section_headers + (header->e_shentsize * section_index));
+                               file->symbol_size = section_header->sh_entsize;
+                               file->symbols_count = (guint32) (section_header->sh_size / section_header->sh_entsize);
+                               file->symbols_start = file->data + section_header->sh_offset;
+                               file->symbols_string_table = ((const char*) file->data) + (((ElfSection*) (section_headers + (header->e_shentsize * strings_index)))->sh_offset);
+                       }
+                       
+                       file->section_regions = g_new0 (ProfilerExecutableFileSectionRegion, file->header->e_shnum);
+               } else {
+                       region->file = file;
+                       file->reference_count ++;
+               }
+       }
+       
+       if (file->header != NULL) {
+               executable_file_add_region_reference (file, region);
+       }
+       
+       return file;
+}
+
+static void
+executable_file_free (ProfilerExecutableFile* file) {
+       if (file->fd != -1) {
+               if (close (file->fd) != 0) {
+                       g_warning ("Cannot close file: '%s'", strerror (errno));
+               }
+               if (file->data != NULL) {
+                       if (munmap (file->data, file->length) != 0) {
+                               g_warning ("Cannot unmap file: '%s'", strerror (errno));
+                       }
+               }
+       }
+       if (file->section_regions != NULL) {
+               g_free (file->section_regions);
+               file->section_regions = NULL;
+       }
+       g_free (file);
+}
+
+static void
+executable_file_close (ProfilerExecutableMemoryRegionData *region) {
+       region->file->reference_count --;
+       
+       if ((region->file_region_reference != NULL) && (region->file_region_reference->region == region)) {
+               region->file_region_reference->region = NULL;
+               region->file_region_reference->section_address = 0;
+               region->file_region_reference->section_offset = 0;
+       }
+       
+       if (region->file->reference_count <= 0) {
+               ProfilerExecutableFiles *files = & (profiler->executable_files);
+               g_hash_table_remove (files->table, region->file_name);
+               executable_file_free (region->file);
+               region->file = NULL;
+       }
+}
+
+static void
+executable_file_count_symbols (ProfilerExecutableFile *file) {
+       int symbol_index;
+       
+       for (symbol_index = 0; symbol_index < file->symbols_count; symbol_index ++) {
+               ElfSymbol *symbol = (ElfSymbol*) (file->symbols_start + (symbol_index * file->symbol_size));
+               
+               if ((ELF_ST_TYPE (symbol->st_info) == ELF_STT_FUNC) &&
+                               (symbol->st_shndx > 0) &&
+                               (symbol->st_shndx < file->header->e_shnum)) {
+                       int symbol_section_index = symbol->st_shndx;
+                       ProfilerExecutableMemoryRegionData *region = file->section_regions [symbol_section_index].region;
+                       if ((region != NULL) && (region->symbols == NULL)) {
+                               region->symbols_count ++;
+                       }
+               }
+       }
+}
+
+static void
+executable_memory_regions_prepare_symbol_tables (ProfilerExecutableMemoryRegions *regions) {
+       int i;
+       for (i = 0; i < regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = regions->regions [i];
+               if ((region->symbols_count > 0) && (region->symbols == NULL)) {
+                       region->symbols = g_new (ProfilerUnmanagedSymbol, region->symbols_count);
+                       region->symbols_capacity = region->symbols_count;
+                       region->symbols_count = 0;
+               }
+       }
+}
+
+static const char*
+executable_region_symbol_get_name (ProfilerExecutableMemoryRegionData *region, ProfilerUnmanagedSymbol *symbol) {
+       ElfSymbol *elf_symbol = (ElfSymbol*) (region->file->symbols_start + (symbol->index * region->file->symbol_size));
+       return region->file->symbols_string_table + elf_symbol->st_name;
+}
+
+static void
+executable_file_build_symbol_tables (ProfilerExecutableFile *file) {
+       int symbol_index;
+       
+       for (symbol_index = 0; symbol_index < file->symbols_count; symbol_index ++) {
+               ElfSymbol *symbol = (ElfSymbol*) (file->symbols_start + (symbol_index * file->symbol_size));
+               
+               if ((ELF_ST_TYPE (symbol->st_info) == ELF_STT_FUNC) &&
+                               (symbol->st_shndx > 0) &&
+                               (symbol->st_shndx < file->header->e_shnum)) {
+                       int symbol_section_index = symbol->st_shndx;
+                       ProfilerExecutableFileSectionRegion *section_region = & (file->section_regions [symbol_section_index]);
+                       ProfilerExecutableMemoryRegionData *region = section_region->region;
+                       
+                       if (region != NULL) {
+                               ProfilerUnmanagedSymbol *new_symbol = & (region->symbols [region->symbols_count]);
+                               region->symbols_count ++;
+                               
+                               new_symbol->id = 0;
+                               new_symbol->index = symbol_index;
+                               new_symbol->size = symbol->st_size;
+                               new_symbol->offset = (((guint8*) symbol->st_value) - section_region->section_address) - (region->file_offset - section_region->section_offset);
                        }
                }
        }
 }
 
 static int
-compare_regions (const void *a1, const void *a2) {
-       ProfilerExecutableMemoryRegionData *r1 = * (ProfilerExecutableMemoryRegionData**) a1;
-       ProfilerExecutableMemoryRegionData *r2 = * (ProfilerExecutableMemoryRegionData**) a2;
-       return (r1->start < r2->start)? -1 : ((r1->start > r2->start)? 1 : 0);
+compare_region_symbols (const void *p1, const void *p2) {
+       const ProfilerUnmanagedSymbol *s1 = p1;
+       const ProfilerUnmanagedSymbol *s2 = p2;
+       return (s1->offset < s2->offset)? -1 : ((s1->offset > s2->offset)? 1 : 0);
 }
 
 static void
-sort_regions (ProfilerExecutableMemoryRegions *regions) {
-       qsort (regions->regions, regions->regions_count, sizeof (ProfilerExecutableMemoryRegionData *), compare_regions);
+executable_memory_regions_sort_symbol_tables (ProfilerExecutableMemoryRegions *regions) {
+       int i;
+       for (i = 0; i < regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = regions->regions [i];
+               if ((region->is_new) && (region->symbols != NULL)) {
+                       qsort (region->symbols, region->symbols_count, sizeof (ProfilerUnmanagedSymbol), compare_region_symbols);
+               }
+       }
+}
+
+static void
+build_symbol_tables (ProfilerExecutableMemoryRegions *regions, ProfilerExecutableFiles *files) {
+       int i;
+       ProfilerExecutableFile *file;
+       
+       for (i = 0; i < regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = regions->regions [i];
+               if ((region->is_new) && (region->file == NULL)) {
+                       executable_file_open (region);
+               }
+       }
+       
+       for (file = files->new_files; file != NULL; file = file->next_new_file) {
+               executable_file_count_symbols (file);
+       }
+       
+       executable_memory_regions_prepare_symbol_tables (regions);
+       
+       for (file = files->new_files; file != NULL; file = file->next_new_file) {
+               executable_file_build_symbol_tables (file);
+       }
+       
+       executable_memory_regions_sort_symbol_tables (regions);
+       
+       file = files->new_files;
+       while (file != NULL) {
+               ProfilerExecutableFile *next_file = file->next_new_file;
+               file->next_new_file = NULL;
+               file = next_file;
+       }
+       files->new_files = NULL;
+}
+
+static ProfilerUnmanagedSymbol*
+executable_memory_region_find_symbol (ProfilerExecutableMemoryRegionData *region, guint32 offset) {
+       if (region->symbols_count > 0) {
+               ProfilerUnmanagedSymbol *low = region->symbols;
+               ProfilerUnmanagedSymbol *high = region->symbols + (region->symbols_count - 1);
+               int step = region->symbols_count >> 1;
+               ProfilerUnmanagedSymbol *current = region->symbols + step;
+               
+               do {
+                       step = (high - low) >> 1;
+                       
+                       if (offset < current->offset) {
+                               high = current;
+                               current = high - step;
+                       } else if (offset >= current->offset) {
+                               if (offset >= (current->offset + current->size)) {
+                                       low = current;
+                                       current = low + step;
+                               } else {
+                                       return current;
+                               }
+                       }
+               } while (step > 0);
+               
+               if ((offset >= current->offset) && (offset < (current->offset + current->size))) {
+                       return current;
+               } else {
+                       return NULL;
+               }
+       } else {
+               return NULL;
+       }
 }
 
 //FIXME: make also Win32 and BSD variants
 #define MAPS_BUFFER_SIZE 4096
+#define MAPS_FILENAME_SIZE 2048
 
 static gboolean
 update_regions_buffer (int fd, char *buffer) {
@@ -1887,9 +3126,9 @@ static int hex_digit_value (char c) {
        if ((c >= '0') && (c <= '9')) {
                return c - '0';
        } else if ((c >= 'a') && (c <= 'f')) {
-               return c - 'a';
+               return c - 'a' + 10;
        } else if ((c >= 'A') && (c <= 'F')) {
-               return c - 'A';
+               return c - 'A' + 10;
        } else {
                return 0;
        }
@@ -1937,13 +3176,12 @@ const char *map_line_parser_state [] = {
 };
 
 static char*
-parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer, char *current) {
+parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer, char *filename, char *current) {
        MapLineParserState state = MAP_LINE_PARSER_STATE_START_ADDRESS;
        gsize start_address = 0;
        gsize end_address = 0;
        guint32 offset = 0;
-       char *start_filename = NULL;
-       char *end_filename = NULL;
+       int filename_index = 0;
        gboolean is_executable = FALSE;
        gboolean done = FALSE;
        
@@ -2005,24 +3243,33 @@ parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer,
                        }
                        break;
                case MAP_LINE_PARSER_STATE_BLANK_BEFORE_FILENAME:
-                       if (c == '/') {
+                       if ((c == '/') || (c == '[')) {
                                state = MAP_LINE_PARSER_STATE_FILENAME;
-                               start_filename = current;
+                               filename [filename_index] = *current;
+                               filename_index ++;
                        } else if (! isblank (c)) {
                                state = MAP_LINE_PARSER_STATE_INVALID;
                        }
                        break;
                case MAP_LINE_PARSER_STATE_FILENAME:
-                       if (c == '\n') {
-                               state = MAP_LINE_PARSER_STATE_DONE;
-                               done = TRUE;
-                               end_filename = current;
+                       if (filename_index < MAPS_FILENAME_SIZE) {
+                               if (c == '\n') {
+                                       state = MAP_LINE_PARSER_STATE_DONE;
+                                       done = TRUE;
+                                       filename [filename_index] = 0;
+                               } else {
+                                       filename [filename_index] = *current;
+                                       filename_index ++;
+                               }
+                       } else {
+                               filename [filename_index] = 0;
+                               g_warning ("ELF filename too long: \"%s\"...\n", filename);
                        }
                        break;
                case MAP_LINE_PARSER_STATE_DONE:
                        if (done && is_executable) {
-                               *end_filename = 0;
-                               append_region (regions, (gpointer) start_address, (gpointer) end_address, offset, start_filename);
+                               filename [filename_index] = 0;
+                               append_region (regions, (gpointer) start_address, (gpointer) end_address, offset, filename);
                        }
                        return current;
                case MAP_LINE_PARSER_STATE_INVALID:
@@ -2032,9 +3279,10 @@ parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer,
                        break;
                }
                
-               
                if (c == 0) {
                        return NULL;
+               } else if (c == '\n') {
+                       state = MAP_LINE_PARSER_STATE_DONE;
                }
                
                GOTO_NEXT_CHAR(current, buffer, fd);
@@ -2045,6 +3293,7 @@ parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer,
 static gboolean
 scan_process_regions (ProfilerExecutableMemoryRegions *regions) {
        char *buffer;
+       char *filename;
        char *current;
        int fd;
        
@@ -2054,13 +3303,15 @@ scan_process_regions (ProfilerExecutableMemoryRegions *regions) {
        }
        
        buffer = malloc (MAPS_BUFFER_SIZE);
+       filename = malloc (MAPS_FILENAME_SIZE);
        update_regions_buffer (fd, buffer);
        current = buffer;
        while (current != NULL) {
-               current = parse_map_line (regions, fd, buffer, current);
+               current = parse_map_line (regions, fd, buffer, filename, current);
        }
        
        free (buffer);
+       free (filename);
        
        close (fd);
        return TRUE;
@@ -2071,22 +3322,55 @@ typedef enum {
        MONO_PROFILER_STATISTICAL_CODE_END = 0,
        MONO_PROFILER_STATISTICAL_CODE_METHOD = 1,
        MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_ID = 2,
-       MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_IN_REGION = 3,
+       MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_NEW_ID = 3,
+       MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_OFFSET_IN_REGION = 4,
+       MONO_PROFILER_STATISTICAL_CODE_CALL_CHAIN = 5,
        MONO_PROFILER_STATISTICAL_CODE_REGIONS = 7
 } MonoProfilerStatisticalCode;
 
 static void
 refresh_memory_regions (void) {
        ProfilerExecutableMemoryRegions *old_regions = profiler->executable_regions;
-       ProfilerExecutableMemoryRegions *new_regions = profiler_executable_memory_regions_new (old_regions->next_id);
+       ProfilerExecutableMemoryRegions *new_regions = profiler_executable_memory_regions_new (old_regions->next_id, old_regions->next_unmanaged_function_id);
        int i;
        
        LOG_WRITER_THREAD ("Refreshing memory regions...");
        scan_process_regions (new_regions);
-       restore_region_ids (old_regions, new_regions);
        sort_regions (new_regions);
+       restore_old_regions (old_regions, new_regions);
+       fix_region_references (new_regions);
        LOG_WRITER_THREAD ("Refreshed memory regions.");
        
+       LOG_WRITER_THREAD ("Building symbol tables...");
+       build_symbol_tables (new_regions, & (profiler->executable_files));
+#if 0
+       printf ("Symbol tables done!\n");
+       printf ("Region summary...\n");
+       for (i = 0; i < new_regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = new_regions->regions [i];
+               printf ("Region %d[%d][NEW:%d] (%p-%p) at %d in file %s\n", i, region->id, region->is_new,
+                               region->start, region->end, region->file_offset, region->file_name);
+       }
+       printf ("New symbol tables dump...\n");
+       for (i = 0; i < new_regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = new_regions->regions [i];
+               
+               if (region->is_new) {
+                       int symbol_index;
+                       
+                       printf ("Region %d[%d][NEW:%d] (%p-%p) at %d in file %s\n", i, region->id, region->is_new,
+                                       region->start, region->end, region->file_offset, region->file_name);
+                       for (symbol_index = 0; symbol_index < region->symbols_count; symbol_index ++) {
+                               ProfilerUnmanagedSymbol *symbol = & (region->symbols [symbol_index]);
+                               printf ("  [%d] Symbol %s (offset %d, size %d)\n", symbol_index,
+                                               executable_region_symbol_get_name (region, symbol),
+                                               symbol->offset, symbol->size);
+                       }
+               }
+       }
+#endif
+       LOG_WRITER_THREAD ("Built symbol tables.");
+       
        // This marks the region "sub-block"
        write_uint32 (MONO_PROFILER_STATISTICAL_CODE_REGIONS);
        
@@ -2112,8 +3396,8 @@ refresh_memory_regions (void) {
                        printf ("[refresh_memory_regions] Wrote region %d (%p-%p[%d] '%s')\n", region->id, region->start, region->end, region->file_offset, region->file_name);
 #endif
                        write_uint32 (region->id);
-                       write_uint64 (GPOINTER_TO_INT (region->start));
-                       write_uint32 (GPOINTER_TO_INT (region->end) - GPOINTER_TO_INT (region->start));
+                       write_uint64 (GPOINTER_TO_UINT (region->start));
+                       write_uint32 (GPOINTER_TO_UINT (region->end) - GPOINTER_TO_UINT (region->start));
                        write_uint32 (region->file_offset);
                        write_string (region->file_name);
                }
@@ -2125,96 +3409,137 @@ refresh_memory_regions (void) {
        profiler->executable_regions = new_regions;
 }
 
+static gboolean
+write_statistical_hit (MonoDomain *domain, gpointer address, gboolean regions_refreshed) {
+       MonoJitInfo *ji = (domain != NULL) ? mono_jit_info_table_find (domain, (char*) address) : NULL;
+       
+       if (ji != NULL) {
+               MonoMethod *method = mono_jit_info_get_method (ji);
+               MethodIdMappingElement *element = method_id_mapping_element_get (method);
+               
+               if (element != NULL) {
+#if DEBUG_STATISTICAL_PROFILER
+                       printf ("[write_statistical_hit] Wrote method %d\n", element->id);
+#endif
+                       write_uint32 ((element->id << 3) | MONO_PROFILER_STATISTICAL_CODE_METHOD);
+               } else {
+#if DEBUG_STATISTICAL_PROFILER
+                       printf ("[write_statistical_hit] Wrote unknown method %p\n", method);
+#endif
+                       write_uint32 (MONO_PROFILER_STATISTICAL_CODE_METHOD);
+               }
+       } else {
+               ProfilerExecutableMemoryRegionData *region = find_address_region (profiler->executable_regions, address);
+               
+               if (region == NULL && ! regions_refreshed) {
+#if DEBUG_STATISTICAL_PROFILER
+                       printf ("[write_statistical_hit] Cannot find region for address %p, refreshing...\n", address);
+#endif
+                       refresh_memory_regions ();
+                       regions_refreshed = TRUE;
+                       region = find_address_region (profiler->executable_regions, address);
+               }
+               
+               if (region != NULL) {
+                       guint32 offset = ((guint8*)address) - ((guint8*)region->start);
+                       ProfilerUnmanagedSymbol *symbol = executable_memory_region_find_symbol (region, offset);
+                       
+                       if (symbol != NULL) {
+                               if (symbol->id > 0) {
+#if DEBUG_STATISTICAL_PROFILER
+                                       printf ("[write_statistical_hit] Wrote unmanaged symbol %d\n", symbol->id);
+#endif
+                                       write_uint32 ((symbol->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_ID);
+                               } else {
+                                       ProfilerExecutableMemoryRegions *regions = profiler->executable_regions;
+                                       const char *symbol_name = executable_region_symbol_get_name (region, symbol);
+                                       symbol->id = regions->next_unmanaged_function_id;
+                                       regions->next_unmanaged_function_id ++;
+#if DEBUG_STATISTICAL_PROFILER
+                                       printf ("[write_statistical_hit] Wrote new unmanaged symbol in region %d[%d]\n", region->id, offset);
+#endif
+                                       write_uint32 ((region->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_NEW_ID);
+                                       write_uint32 (symbol->id);
+                                       write_string (symbol_name);
+                               }
+                       } else {
+#if DEBUG_STATISTICAL_PROFILER
+                               printf ("[write_statistical_hit] Wrote unknown unmanaged hit in region %d[%d] (address %p)\n", region->id, offset, address);
+#endif
+                               write_uint32 ((region->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_OFFSET_IN_REGION);
+                               write_uint32 (offset);
+                       }
+               } else {
+#if DEBUG_STATISTICAL_PROFILER
+                       printf ("[write_statistical_hit] Wrote unknown unmanaged hit %p\n", address);
+#endif
+                       write_uint32 (MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_OFFSET_IN_REGION);
+                       write_uint64 (GPOINTER_TO_UINT (address));
+               }
+       }
+       
+       return regions_refreshed;
+}
+
 static void
 flush_all_mappings (void);
 
 static void
 write_statistical_data_block (ProfilerStatisticalData *data) {
+       MonoThread *current_thread = mono_thread_current ();
        int start_index = data->first_unwritten_index;
        int end_index = data->next_free_index;
        gboolean regions_refreshed = FALSE;
+       int call_chain_depth = profiler->statistical_call_chain_depth;
        int index;
-       ProfilerUnmanagedFunctions *functions = &(profiler->unmanaged_functions);
        
        if (end_index > data->end_index)
                end_index = data->end_index;
        
        if (start_index == end_index)
-               return;
-       
-       data->first_unwritten_index = end_index;
-       
-       write_clock_data ();
-       
-       for (index = start_index; index < end_index; index ++) {
-               gpointer address = data->addresses [index];
-               MonoJitInfo *ji = mono_jit_info_table_find (mono_domain_get (), (char*) address);
-               
-               if (ji != NULL) {
-                       MonoMethod *method = mono_jit_info_get_method (ji);
-                       MethodIdMappingElement *element = method_id_mapping_element_get (method);
-                       
-                       if (element != NULL) {
-#if DEBUG_STATISTICAL_PROFILER
-                               printf ("[write_statistical_data_block] Wrote method %d\n", element->id);
-#endif
-                               write_uint32 ((element->id << 3) | MONO_PROFILER_STATISTICAL_CODE_METHOD);
-                       } else {
-#if DEBUG_STATISTICAL_PROFILER
-                               printf ("[write_statistical_data_block] Wrote unknown method %p\n", method);
-#endif
-                               write_uint32 (MONO_PROFILER_STATISTICAL_CODE_METHOD);
-                       }
-               } else {
-                       if (! unmanaged_function_hit (functions, address)) {
-                               ProfilerExecutableMemoryRegionData *region = find_address_region (profiler->executable_regions, address);
-                               
-                               if (region == NULL && ! regions_refreshed) {
-                                       refresh_memory_regions ();
-                                       regions_refreshed = TRUE;
-                                       region = find_address_region (profiler->executable_regions, address);
-                               }
-                               
-                               if (region != NULL) {
-#if DEBUG_STATISTICAL_PROFILER
-                                       printf ("[write_statistical_data_block] Wrote unmanaged hit %d[%d]\n", region->id, GPOINTER_TO_INT (address) - GPOINTER_TO_INT (region->start));
-#endif
-                                       write_uint32 ((region->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_IN_REGION);
-                                       write_uint32 (GPOINTER_TO_INT (address) - GPOINTER_TO_INT (region->start));
-                               } else {
+               return;
+       
+       data->first_unwritten_index = end_index;
+       
+       write_clock_data ();
+       
 #if DEBUG_STATISTICAL_PROFILER
-                                       printf ("[write_statistical_data_block] Wrote unknown unmanaged hit %p\n", address);
+       printf ("[write_statistical_data_block] Starting loop at index %d\n", start_index);
 #endif
-                                       write_uint32 (MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_IN_REGION);
-                                       write_uint64 (GPOINTER_TO_INT (address));
-                               }
+       
+       for (index = start_index; index < end_index; index ++) {
+               int base_index = index * (call_chain_depth + 1);
+               ProfilerStatisticalHit hit = data->hits [base_index];
+               int callers_count;
+               
+               regions_refreshed = write_statistical_hit ((current_thread != NULL) ? hit.domain : NULL, hit.address, regions_refreshed);
+               base_index ++;
+               
+               for (callers_count = 0; callers_count < call_chain_depth; callers_count ++) {
+                       hit = data->hits [base_index + callers_count];
+                       if (hit.address == NULL) {
+                               break;
                        }
                }
-       }
-       if (functions->unwritten_queue != functions->unwritten_queue_end) {
-               ProfilerUnmanagedFunction *end = functions->unwritten_queue_end;
-               ProfilerUnmanagedFunction *function = functions->unwritten_queue;
-               functions->unwritten_queue = functions->unwritten_queue_end;
                
-               while (function != end) {
-                       ProfilerUnmanagedFunction *next = function->next_unwritten;
+               if (callers_count > 0) {
+                       write_uint32 ((callers_count << 3) | MONO_PROFILER_STATISTICAL_CODE_CALL_CHAIN);
                        
-                       write_uint32 ((function->id << 3) | MONO_PROFILER_STATISTICAL_CODE_UNMANAGED_FUNCTION_ID);
-                       if (function->name != NULL) {
-                               write_uint32 (0);
-                               write_string (function->name);
-                               g_free (function->name);
-                               function->name = NULL;
-                       }
-                       write_uint32 (function->hits);
-                       function->hits = 0;
-                       
-                       function->next_unwritten = NULL;
-                       function = next;
+                       for (callers_count = 0; callers_count < call_chain_depth; callers_count ++) {
+                               hit = data->hits [base_index + callers_count];
+                               if (hit.address != NULL) {
+                                       regions_refreshed = write_statistical_hit ((current_thread != NULL) ? hit.domain : NULL, hit.address, regions_refreshed);
+                               } else {
+                                       break;
+                               }
+                       }
                }
        }
        write_uint32 (MONO_PROFILER_STATISTICAL_CODE_END);
        
+#if DEBUG_STATISTICAL_PROFILER
+       printf ("[write_statistical_data_block] Ending loop at index %d\n", end_index);
+#endif
        write_clock_data ();
        
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_STATISTICAL);
@@ -2261,10 +3586,15 @@ update_mapping (ProfilerPerThreadData *data) {
                        MethodIdMappingElement *element = method_id_mapping_element_get (start->data.address);
                        if (element == NULL) {
                                MonoMethod *method = start->data.address;
-                               method_id_mapping_element_new (method);
+                               if (method != NULL) {
+                                       method_id_mapping_element_new (method);
+                               }
                        }
                }
                
+               if (start->value == MAX_EVENT_VALUE) {
+                       start ++;
+               }
                start ++;
        }
 #if (DEBUG_LOGGING_PROFILER)
@@ -2291,7 +3621,7 @@ flush_full_event_data_buffer (ProfilerPerThreadData *data) {
        // We flush all mappings because some id definitions could come
        // from other threads
        flush_all_mappings ();
-       g_assert (data->first_unmapped_event == data->end_event);
+       g_assert (data->first_unmapped_event >= data->next_free_event);
        
        write_thread_data_block (data);
        
@@ -2304,13 +3634,15 @@ flush_full_event_data_buffer (ProfilerPerThreadData *data) {
        UNLOCK_PROFILER ();
 }
 
-#define GET_NEXT_FREE_EVENT(d,e) {\
-       if ((d)->next_free_event >= (d)->end_event) {\
+/* The ">=" operator is intentional, to leave one spare slot for "extended values" */
+#define RESERVE_EVENTS(d,e,count) {\
+       if ((d)->next_free_event >= ((d)->end_event - (count))) {\
                flush_full_event_data_buffer (d);\
        }\
        (e) = (d)->next_free_event;\
-       (d)->next_free_event ++;\
+       (d)->next_free_event += (count);\
 } while (0)
+#define GET_NEXT_FREE_EVENT(d,e) RESERVE_EVENTS ((d),(e),1)
 
 static void
 flush_everything (void) {
@@ -2323,6 +3655,21 @@ flush_everything (void) {
        write_statistical_data_block (profiler->statistical_data);
 }
 
+/* This assumes the lock is held: it just offloads the work to the writer thread. */
+static void
+writer_thread_flush_everything (void) {
+       if (CHECK_WRITER_THREAD ()) {
+               profiler->writer_thread_flush_everything = TRUE;
+               LOG_WRITER_THREAD ("writer_thread_flush_everything: raising event...");
+               WRITER_EVENT_RAISE ();
+               LOG_WRITER_THREAD ("writer_thread_flush_everything: waiting event...");
+               WRITER_EVENT_DONE_WAIT ();
+               LOG_WRITER_THREAD ("writer_thread_flush_everything: got event.");
+       } else {
+               LOG_WRITER_THREAD ("writer_thread_flush_everything: no thread.");
+       }
+}
+
 #define RESULT_TO_LOAD_CODE(r) (((r)==MONO_PROFILE_OK)?MONO_PROFILER_LOADED_EVENT_SUCCESS:MONO_PROFILER_LOADED_EVENT_FAILURE)
 static void
 appdomain_start_load (MonoProfiler *profiler, MonoDomain *domain) {
@@ -2347,7 +3694,7 @@ static void
 appdomain_start_unload (MonoProfiler *profiler, MonoDomain *domain) {
        LOCK_PROFILER ();
        loaded_element_unload_start (profiler->loaded_appdomains, domain);
-       flush_everything ();
+       writer_thread_flush_everything ();
        UNLOCK_PROFILER ();
 }
 
@@ -2374,8 +3721,11 @@ module_end_load (MonoProfiler *profiler, MonoImage *module, int result) {
        MonoAssemblyName aname;
        LoadedElement *element;
        
-       mono_assembly_fill_assembly_name (module, &aname);
-       name = mono_stringify_assembly_name (&aname);
+       if (mono_assembly_fill_assembly_name (module, &aname)) {
+               name = mono_stringify_assembly_name (&aname);
+       } else {
+               name = g_strdup_printf ("Dynamic module \"%p\"", module);
+       }
        LOCK_PROFILER ();
        element = loaded_element_load_end (profiler->loaded_modules, module, name);
        write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_MODULE | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID ());
@@ -2386,7 +3736,7 @@ static void
 module_start_unload (MonoProfiler *profiler, MonoImage *module) {
        LOCK_PROFILER ();
        loaded_element_unload_start (profiler->loaded_modules, module);
-       flush_everything ();
+       writer_thread_flush_everything ();
        UNLOCK_PROFILER ();
 }
 
@@ -2413,8 +3763,11 @@ assembly_end_load (MonoProfiler *profiler, MonoAssembly *assembly, int result) {
        MonoAssemblyName aname;
        LoadedElement *element;
        
-       mono_assembly_fill_assembly_name (mono_assembly_get_image (assembly), &aname);
-       name = mono_stringify_assembly_name (&aname);
+       if (mono_assembly_fill_assembly_name (mono_assembly_get_image (assembly), &aname)) {
+               name = mono_stringify_assembly_name (&aname);
+       } else {
+               name = g_strdup_printf ("Dynamic assembly \"%p\"", assembly);
+       }
        LOCK_PROFILER ();
        element = loaded_element_load_end (profiler->loaded_assemblies, assembly, name);
        write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_ASSEMBLY | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID ());
@@ -2425,7 +3778,7 @@ static void
 assembly_start_unload (MonoProfiler *profiler, MonoAssembly *assembly) {
        LOCK_PROFILER ();
        loaded_element_unload_start (profiler->loaded_assemblies, assembly);
-       flush_everything ();
+       writer_thread_flush_everything ();
        UNLOCK_PROFILER ();
 }
 static void
@@ -2450,11 +3803,14 @@ class_event_code_to_string (MonoProfilerClassEvents code) {
        }
 }
 static const char*
-method_event_code_to_string (MonoProfilerClassEvents code) {
+method_event_code_to_string (MonoProfilerMethodEvents code) {
        switch (code) {
        case MONO_PROFILER_EVENT_METHOD_CALL: return "CALL";
        case MONO_PROFILER_EVENT_METHOD_JIT: return "JIT";
        case MONO_PROFILER_EVENT_METHOD_FREED: return "FREED";
+       case MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER: return "ALLOCATION_CALLER";
+       case MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER: return "ALLOCATION_JIT_TIME_CALLER";
+       case MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID: return "ALLOCATION_OBJECT_ID";
        default: g_assert_not_reached (); return "";
        }
 }
@@ -2468,6 +3824,9 @@ number_event_code_to_string (MonoProfilerEvents code) {
        case MONO_PROFILER_EVENT_GC_RESIZE: return "GC_RESIZE";
        case MONO_PROFILER_EVENT_GC_STOP_WORLD: return "GC_STOP_WORLD";
        case MONO_PROFILER_EVENT_GC_START_WORLD: return "GC_START_WORLD";
+       case MONO_PROFILER_EVENT_JIT_TIME_ALLOCATION: return "JIT_TIME_ALLOCATION";
+       case MONO_PROFILER_EVENT_STACK_SECTION: return "STACK_SECTION";
+       case MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID: return "ALLOCATION_OBJECT_ID";
        default: g_assert_not_reached (); return "";
        }
 }
@@ -2488,12 +3847,12 @@ event_kind_to_string (MonoProfilerEventKind code) {
        }
 }
 static void
-print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
+print_event_data (ProfilerPerThreadData *data, ProfilerEventData *event, guint64 value) {
        if (event->data_type == MONO_PROFILER_EVENT_DATA_TYPE_CLASS) {
-               printf ("[TID %ld] CLASS[%p] event [%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s)\n",
-                               thread_id,
+               printf ("STORE EVENT [TID %ld][EVENT %ld] CLASS[%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s)\n",
+                               data->thread_id,
+                               event - data->events,
                                event->data.address,
-                               event,
                                class_event_code_to_string (event->code & ~MONO_PROFILER_EVENT_RESULT_MASK),
                                event_result_to_string (event->code & MONO_PROFILER_EVENT_RESULT_MASK),
                                event_kind_to_string (event->kind),
@@ -2504,10 +3863,10 @@ print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
                                mono_class_get_namespace ((MonoClass*) event->data.address),
                                mono_class_get_name ((MonoClass*) event->data.address));
        } else if (event->data_type == MONO_PROFILER_EVENT_DATA_TYPE_METHOD) {
-               printf ("[TID %ld] METHOD[%p] event [%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s:%s (?))\n",
-                               thread_id,
+               printf ("STORE EVENT [TID %ld][EVENT %ld]  METHOD[%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s:%s (?))\n",
+                               data->thread_id,
+                               event - data->events,
                                event->data.address,
-                               event,
                                method_event_code_to_string (event->code & ~MONO_PROFILER_EVENT_RESULT_MASK),
                                event_result_to_string (event->code & MONO_PROFILER_EVENT_RESULT_MASK),
                                event_kind_to_string (event->kind),
@@ -2515,14 +3874,14 @@ print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
                                event->kind,
                                event->code,
                                value,
-                               mono_class_get_namespace (mono_method_get_class ((MonoMethod*) event->data.address)),
-                               mono_class_get_name (mono_method_get_class ((MonoMethod*) event->data.address)),
-                               mono_method_get_name ((MonoMethod*) event->data.address));
+                               (event->data.address != NULL) ? mono_class_get_namespace (mono_method_get_class ((MonoMethod*) event->data.address)) : "<NULL>",
+                               (event->data.address != NULL) ? mono_class_get_name (mono_method_get_class ((MonoMethod*) event->data.address)) : "<NULL>",
+                               (event->data.address != NULL) ? mono_method_get_name ((MonoMethod*) event->data.address) : "<NULL>");
        } else {
-               printf ("[TID %ld] NUMBER[%ld] event [%p] %s:%s[%d-%d-%d] %ld\n",
-                               thread_id,
+               printf ("STORE EVENT [TID %ld][EVENT %ld]  NUMBER[%ld] %s:%s[%d-%d-%d] %ld\n",
+                               data->thread_id,
+                               event - data->events,
                                (guint64) event->data.number,
-                               event,
                                number_event_code_to_string (event->code),
                                event_kind_to_string (event->kind),
                                event->data_type,
@@ -2531,129 +3890,134 @@ print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
                                value);
        }
 }
-#define LOG_EVENT(tid,ev,val) print_event_data ((tid),(ev),(val))
+#define LOG_EVENT(data,ev,val) print_event_data ((data),(ev),(val))
 #else
-#define LOG_EVENT(tid,ev,val)
+#define LOG_EVENT(data,ev,val)
 #endif
 
 #define RESULT_TO_EVENT_CODE(r) (((r)==MONO_PROFILE_OK)?MONO_PROFILER_EVENT_RESULT_SUCCESS:MONO_PROFILER_EVENT_RESULT_FAILURE)
 
-#define STORE_EVENT_ITEM_COUNTER(p,i,dt,c,k) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
+#define STORE_EVENT_ITEM_COUNTER(event,p,i,dt,c,k) do {\
        guint64 counter;\
        guint64 delta;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
        MONO_PROFILER_GET_CURRENT_COUNTER (counter);\
-       event->data.address = (i);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+       (event)->data.address = (i);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        delta = counter - data->last_event_counter;\
        if (delta < MAX_EVENT_VALUE) {\
-               event->value = delta;\
+               (event)->value = delta;\
        } else {\
                ProfilerEventData *extension = data->next_free_event;\
                data->next_free_event ++;\
-               event->value = delta >> 32;\
-               extension->data.number = delta & 0xffffffff;\
+               (event)->value = MAX_EVENT_VALUE;\
+               *(guint64*)extension = delta;\
        }\
        data->last_event_counter = counter;\
-       LOG_EVENT (data->thread_id, event, delta);\
+       LOG_EVENT (data, (event), delta);\
 } while (0);
-#define STORE_EVENT_ITEM_VALUE(p,i,dt,c,k,v) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
-       event->data.address = (i);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+#define STORE_EVENT_ITEM_VALUE(event,p,i,dt,c,k,v) do {\
+       (event)->data.address = (i);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        if ((v) < MAX_EVENT_VALUE) {\
-               event->value = (v);\
+               (event)->value = (v);\
        } else {\
                ProfilerEventData *extension = data->next_free_event;\
                data->next_free_event ++;\
-               event->value = (v) >> 32;\
-               extension->data.number = (v) & 0xffffffff;\
+               (event)->value = MAX_EVENT_VALUE;\
+               *(guint64*)extension = (v);\
        }\
-       LOG_EVENT (data->thread_id, event, (v));\
+       LOG_EVENT (data, (event), (v));\
 }while (0);
-#define STORE_EVENT_NUMBER_COUNTER(p,n,dt,c,k) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
+#define STORE_EVENT_NUMBER_COUNTER(event,p,n,dt,c,k) do {\
        guint64 counter;\
        guint64 delta;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
        MONO_PROFILER_GET_CURRENT_COUNTER (counter);\
-       event->data.number = (n);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+       (event)->data.number = (n);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        delta = counter - data->last_event_counter;\
        if (delta < MAX_EVENT_VALUE) {\
-               event->value = delta;\
+               (event)->value = delta;\
        } else {\
                ProfilerEventData *extension = data->next_free_event;\
                data->next_free_event ++;\
-               event->value = delta >> 32;\
-               extension->data.number = delta & 0xffffffff;\
+               (event)->value = MAX_EVENT_VALUE;\
+               *(guint64*)extension = delta;\
        }\
        data->last_event_counter = counter;\
-       LOG_EVENT (data->thread_id, event, delta);\
+       LOG_EVENT (data, (event), delta);\
 }while (0);
-#define STORE_EVENT_NUMBER_VALUE(p,n,dt,c,k,v) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
-       event->data.number = (n);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+#define STORE_EVENT_NUMBER_VALUE(event,p,n,dt,c,k,v) do {\
+       (event)->data.number = (n);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        if ((v) < MAX_EVENT_VALUE) {\
-               event->value = (v);\
+               (event)->value = (v);\
        } else {\
                ProfilerEventData *extension = data->next_free_event;\
                data->next_free_event ++;\
-               event->value = (v) >> 32;\
-               extension->data.number = (v) & 0xffffffff;\
+               (event)->value = MAX_EVENT_VALUE;\
+               *(guint64*)extension = (v);\
        }\
-       LOG_EVENT (data->thread_id, event, (v));\
+       LOG_EVENT (data, (event), (v));\
 }while (0);
 
-
 static void
 class_start_load (MonoProfiler *profiler, MonoClass *klass) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD, MONO_PROFILER_EVENT_KIND_START);
 }
 static void
 class_end_load (MonoProfiler *profiler, MonoClass *klass, int result) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
 }
 static void
 class_start_unload (MonoProfiler *profiler, MonoClass *klass) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_START);
 }
 static void
 class_end_unload (MonoProfiler *profiler, MonoClass *klass) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_END);
 }
 
 static void
 method_start_jit (MonoProfiler *profiler, MonoMethod *method) {
-       if (profiler->action_flags.jit_time) {
-               STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT, MONO_PROFILER_EVENT_KIND_START);
-       }
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       thread_stack_push_jitted_safely (&(data->stack), method, TRUE);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT, MONO_PROFILER_EVENT_KIND_START);
 }
 static void
 method_end_jit (MonoProfiler *profiler, MonoMethod *method, int result) {
-       if (profiler->action_flags.jit_time) {
-               STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
-       }
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
+       thread_stack_pop (&(data->stack));
 }
 
 #if (HAS_OPROFILE)
@@ -2679,50 +4043,203 @@ method_jit_result (MonoProfiler *prof, MonoMethod *method, MonoJitInfo* jinfo, i
 
 static void
 method_enter (MonoProfiler *profiler, MonoMethod *method) {
-       STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       
+       CHECK_PROFILER_ENABLED ();
+       GET_PROFILER_THREAD_DATA (data);
+       if (profiler->action_flags.track_calls) {
+               ProfilerEventData *event;
+               GET_NEXT_FREE_EVENT (data, event);
+               STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_START);
+       }
+       if (profiler->action_flags.track_stack) {
+               thread_stack_push_safely (&(data->stack), method);
+       }
 }
 static void
 method_leave (MonoProfiler *profiler, MonoMethod *method) {
-       STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       
+       CHECK_PROFILER_ENABLED ();
+       GET_PROFILER_THREAD_DATA (data);
+       if (profiler->action_flags.track_calls) {
+               ProfilerEventData *event;
+               GET_NEXT_FREE_EVENT (data, event);
+               STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_END);
+       }
+       if (profiler->action_flags.track_stack) {
+               thread_stack_pop (&(data->stack));
+       }
 }
 
 static void
 method_free (MonoProfiler *profiler, MonoMethod *method) {
-       STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_FREED, 0);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_FREED, 0);
 }
 
 static void
 thread_start (MonoProfiler *profiler, gsize tid) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_NUMBER_COUNTER (event, profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_START);
 }
 static void
 thread_end (MonoProfiler *profiler, gsize tid) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_NUMBER_COUNTER (event, profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_END);
 }
 
 static void
 object_allocated (MonoProfiler *profiler, MonoObject *obj, MonoClass *klass) {
-       ProfilerPerThreadData *thread_data;
+       ProfilerPerThreadData *data;
+       ProfilerEventData *events;
+       int unsaved_frames;
+       int event_slot_count;
+       
+       GET_PROFILER_THREAD_DATA (data);
+       event_slot_count = 1;
+       if (profiler->action_flags.save_allocation_caller) {
+               event_slot_count ++;
+       }
+       if (profiler->action_flags.allocations_carry_id) {
+               event_slot_count ++;
+       }
+       if (profiler->action_flags.save_allocation_stack) {
+               unsaved_frames = thread_stack_count_unsaved_frames (&(data->stack));
+               event_slot_count += (unsaved_frames + 1);
+       } else {
+               unsaved_frames = 0;
+       }
+       RESERVE_EVENTS (data, events, event_slot_count);
+       
+       if (profiler->action_flags.save_allocation_stack) {
+               int i;
+               
+               STORE_EVENT_NUMBER_VALUE (events, profiler, data->stack.last_saved_top, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_STACK_SECTION, 0, unsaved_frames);
+               events++;
+               for (i = 0; i < unsaved_frames; i++) {
+                       if (! thread_stack_index_from_top_is_jitted (&(data->stack), i)) {
+                               STORE_EVENT_ITEM_VALUE (events, profiler, thread_stack_index_from_top (&(data->stack), i), MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER, 0, 0);
+                       } else {
+                               STORE_EVENT_ITEM_VALUE (events, profiler, thread_stack_index_from_top (&(data->stack), i), MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER, 0, 0);
+                       }
+                       events ++;
+               }
+               
+               data->stack.last_saved_top = data->stack.top;
+       }
        
-       STORE_EVENT_ITEM_VALUE (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_ALLOCATION, 0, (guint64) mono_object_get_size (obj));
-       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot) {
-               GET_PROFILER_THREAD_DATA (thread_data);
-               STORE_ALLOCATED_OBJECT (thread_data, obj);
+       STORE_EVENT_ITEM_VALUE (events, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_ALLOCATION, 0, (guint64) mono_object_get_size (obj));
+       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot || profiler->action_flags.collection_summary) {
+               STORE_ALLOCATED_OBJECT (data, obj);
+       }
+       
+       if (profiler->action_flags.save_allocation_caller) {
+               MonoMethod *caller = thread_stack_top (&(data->stack));
+               gboolean caller_is_jitted = thread_stack_top_is_jitted (&(data->stack));
+               int index = 1;
+               events ++;
+               
+               while ((caller != NULL) && (caller->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)) {
+                       caller = thread_stack_index_from_top (&(data->stack), index);
+                       caller_is_jitted = thread_stack_index_from_top_is_jitted (&(data->stack), index);
+                       index ++;
+               }
+               if (! caller_is_jitted) {
+                       STORE_EVENT_ITEM_VALUE (events, profiler, caller, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER, 0, 0);
+               } else {
+                       STORE_EVENT_ITEM_VALUE (events, profiler, caller, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER, 0, 0);
+               }
+       }
+       if (profiler->action_flags.allocations_carry_id) {
+               events ++;
+               STORE_EVENT_ITEM_VALUE (events, profiler, obj, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID, 0, 0);
        }
 }
 
+static void
+statistical_call_chain (MonoProfiler *profiler, int call_chain_depth, guchar **ips, void *context) {
+       MonoDomain *domain = mono_domain_get ();
+       ProfilerStatisticalData *data;
+       unsigned int index;
+       
+       CHECK_PROFILER_ENABLED ();
+       do {
+               data = profiler->statistical_data;
+               index = InterlockedIncrement ((int*) &data->next_free_index);
+               
+               if (index <= data->end_index) {
+                       unsigned int base_index = (index - 1) * (profiler->statistical_call_chain_depth + 1);
+                       unsigned int call_chain_index = 0;
+                       
+                       //printf ("[statistical_call_chain] (%d)\n", call_chain_depth);
+                       while (call_chain_index < call_chain_depth) {
+                               ProfilerStatisticalHit *hit = & (data->hits [base_index + call_chain_index]);
+                               //printf ("[statistical_call_chain] [%d] = %p\n", base_index + call_chain_index, ips [call_chain_index]);
+                               hit->address = (gpointer) ips [call_chain_index];
+                               hit->domain = domain;
+                               call_chain_index ++;
+                       }
+                       while (call_chain_index <= profiler->statistical_call_chain_depth) {
+                               ProfilerStatisticalHit *hit = & (data->hits [base_index + call_chain_index]);
+                               //printf ("[statistical_call_chain] [%d] = NULL\n", base_index + call_chain_index);
+                               hit->address = NULL;
+                               hit->domain = NULL;
+                               call_chain_index ++;
+                       }
+               } else {
+                       /* Check if we are the one that must swap the buffers */
+                       if (index == data->end_index + 1) {
+                               ProfilerStatisticalData *new_data;
+
+                               /* In the *impossible* case that the writer thread has not finished yet, */
+                               /* loop waiting for it and meanwhile lose all statistical events... */
+                               do {
+                                       /* First, wait that it consumed the ready buffer */
+                                       while (profiler->statistical_data_ready != NULL);
+                                       /* Then, wait that it produced the free buffer */
+                                       new_data = profiler->statistical_data_second_buffer;
+                               } while (new_data == NULL);
+
+                               profiler->statistical_data_ready = data;
+                               profiler->statistical_data = new_data;
+                               profiler->statistical_data_second_buffer = NULL;
+                               WRITER_EVENT_RAISE ();
+                               /* Otherwise exit from the handler and drop the event... */
+                       } else {
+                               break;
+                       }
+                       
+                       /* Loop again, hoping to acquire a free slot this time (otherwise the event will be dropped) */
+                       data = NULL;
+               }
+       } while (data == NULL);
+}
 
 static void
 statistical_hit (MonoProfiler *profiler, guchar *ip, void *context) {
+       MonoDomain *domain = mono_domain_get ();
        ProfilerStatisticalData *data;
-       int index;
+       unsigned int index;
        
+       CHECK_PROFILER_ENABLED ();
        do {
                data = profiler->statistical_data;
-               index = InterlockedIncrement (&data->next_free_index);
+               index = InterlockedIncrement ((int*) &data->next_free_index);
                
                if (index <= data->end_index) {
-                       data->addresses [index - 1] = (gpointer) ip;
+                       ProfilerStatisticalHit *hit = & (data->hits [index - 1]);
+                       hit->address = (gpointer) ip;
+                       hit->domain = domain;
                } else {
                        /* Check if we are the one that must swap the buffers */
                        if (index == data->end_index + 1) {
@@ -2736,7 +4253,7 @@ statistical_hit (MonoProfiler *profiler, guchar *ip, void *context) {
                                        /* Then, wait that it produced the free buffer */
                                        new_data = profiler->statistical_data_second_buffer;
                                } while (new_data == NULL);
-
+                               
                                profiler->statistical_data_ready = data;
                                profiler->statistical_data = new_data;
                                profiler->statistical_data_second_buffer = NULL;
@@ -2912,64 +4429,74 @@ report_object_references (gpointer *start, ClassIdMappingElement *layout, Profil
 
 static void
 profiler_heap_report_object_reachable (ProfilerHeapShotWriteJob *job, MonoObject *obj) {
-       if (profiler->action_flags.heap_shot && (job != NULL)) {
+       if (job != NULL) {
                MonoClass *klass = mono_object_get_class (obj);
-               int reference_counter = 0;
-               gpointer *reference_counter_location;
+               ClassIdMappingElement *class_id = class_id_mapping_element_get (klass);
+               if (class_id == NULL) {
+                       printf ("profiler_heap_report_object_reachable: class %p (%s.%s) has no id\n", klass, mono_class_get_namespace (klass), mono_class_get_name (klass));
+               }
+               g_assert (class_id != NULL);
                
-               WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE (job, obj, HEAP_CODE_OBJECT);
+               if (job->summary.capacity > 0) {
+                       guint32 id = class_id->id;
+                       g_assert (id < job->summary.capacity);
+                       
+                       job->summary.per_class_data [id].reachable.instances ++;
+                       job->summary.per_class_data [id].reachable.bytes += mono_object_get_size (obj);
+               }
+               if (profiler->action_flags.heap_shot && job->dump_heap_data) {
+                       int reference_counter = 0;
+                       gpointer *reference_counter_location;
+                       
+                       WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE (job, obj, HEAP_CODE_OBJECT);
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_report_object_reachable: reported object %p at cursor %p\n", obj, (job->cursor - 1));
+                       printf ("profiler_heap_report_object_reachable: reported object %p at cursor %p\n", obj, (job->cursor - 1));
 #endif
-               WRITE_HEAP_SHOT_JOB_VALUE (job, NULL);
-               reference_counter_location = job->cursor - 1;
-               
-               if (mono_class_get_rank (klass)) {
-                       MonoArray *array = (MonoArray *) obj;
-                       MonoClass *element_class = mono_class_get_element_class (klass);
-                       ClassIdMappingElement *element_id = class_id_mapping_element_get (element_class);
+                       WRITE_HEAP_SHOT_JOB_VALUE (job, NULL);
+                       reference_counter_location = job->cursor - 1;
                        
-                       g_assert (element_id != NULL);
-                       if (element_id->data.layout.slots == CLASS_LAYOUT_NOT_INITIALIZED) {
-                               class_id_mapping_element_build_layout_bitmap (element_class, element_id);
-                       }
-                       if (! mono_class_is_valuetype (element_class)) {
-                               int length = mono_array_length (array);
-                               int i;
-                               for (i = 0; i < length; i++) {
-                                       MonoObject *array_element = mono_array_get (array, MonoObject*, i);
-                                       if ((array_element != NULL) && mono_object_is_alive (array_element)) {
-                                               reference_counter ++;
-                                               WRITE_HEAP_SHOT_JOB_VALUE (job, array_element);
+                       if (mono_class_get_rank (klass)) {
+                               MonoArray *array = (MonoArray *) obj;
+                               MonoClass *element_class = mono_class_get_element_class (klass);
+                               ClassIdMappingElement *element_id = class_id_mapping_element_get (element_class);
+                               
+                               g_assert (element_id != NULL);
+                               if (element_id->data.layout.slots == CLASS_LAYOUT_NOT_INITIALIZED) {
+                                       class_id_mapping_element_build_layout_bitmap (element_class, element_id);
+                               }
+                               if (! mono_class_is_valuetype (element_class)) {
+                                       int length = mono_array_length (array);
+                                       int i;
+                                       for (i = 0; i < length; i++) {
+                                               MonoObject *array_element = mono_array_get (array, MonoObject*, i);
+                                               if ((array_element != NULL) && mono_object_is_alive (array_element)) {
+                                                       reference_counter ++;
+                                                       WRITE_HEAP_SHOT_JOB_VALUE (job, array_element);
+                                               }
+                                       }
+                               } else if (element_id->data.layout.references > 0) {
+                                       int length = mono_array_length (array);
+                                       int array_element_size = mono_array_element_size (klass);
+                                       int i;
+                                       for (i = 0; i < length; i++) {
+                                               gpointer array_element_address = mono_array_addr_with_size (array, array_element_size, i);
+                                               reference_counter += report_object_references (array_element_address, element_id, job);
                                        }
                                }
-                       } else if (element_id->data.layout.references > 0) {
-                               int length = mono_array_length (array);
-                               int array_element_size = mono_array_element_size (klass);
-                               int i;
-                               for (i = 0; i < length; i++) {
-                                       gpointer array_element_address = mono_array_addr_with_size (array, array_element_size, i);
-                                       reference_counter += report_object_references (array_element_address, element_id, job);
+                       } else {
+                               if (class_id->data.layout.slots == CLASS_LAYOUT_NOT_INITIALIZED) {
+                                       class_id_mapping_element_build_layout_bitmap (klass, class_id);
+                               }
+                               if (class_id->data.layout.references > 0) {
+                                       reference_counter += report_object_references ((gpointer)(((char*)obj) + sizeof (MonoObject)), class_id, job);
                                }
                        }
-               } else {
-                       ClassIdMappingElement *class_id = class_id_mapping_element_get (klass);
-                       if (class_id == NULL) {
-                               printf ("profiler_heap_report_object_reachable: class %p (%s.%s) has no id\n", klass, mono_class_get_namespace (klass), mono_class_get_name (klass));
-                       }
-                       g_assert (class_id != NULL);
-                       if (class_id->data.layout.slots == CLASS_LAYOUT_NOT_INITIALIZED) {
-                               class_id_mapping_element_build_layout_bitmap (klass, class_id);
-                       }
-                       if (class_id->data.layout.references > 0) {
-                               reference_counter += report_object_references ((gpointer)(((char*)obj) + sizeof (MonoObject)), class_id, job);
-                       }
-               }
-               
-               *reference_counter_location = GINT_TO_POINTER (reference_counter);
+                       
+                       *reference_counter_location = GINT_TO_POINTER (reference_counter);
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_report_object_reachable: updated reference_counter_location %p with value %d\n", reference_counter_location, reference_counter);
+                       printf ("profiler_heap_report_object_reachable: updated reference_counter_location %p with value %d\n", reference_counter_location, reference_counter);
 #endif
+               }
        }
 }
 static void
@@ -2978,15 +4505,31 @@ profiler_heap_report_object_unreachable (ProfilerHeapShotWriteJob *job, MonoObje
                MonoClass *klass = mono_object_get_class (obj);
                guint32 size = mono_object_get_size (obj);
                
+               if (job->summary.capacity > 0) {
+                       ClassIdMappingElement *class_id = class_id_mapping_element_get (klass);
+                       guint32 id;
+                       
+                       if (class_id == NULL) {
+                               printf ("profiler_heap_report_object_reachable: class %p (%s.%s) has no id\n", klass, mono_class_get_namespace (klass), mono_class_get_name (klass));
+                       }
+                       g_assert (class_id != NULL);
+                       id = class_id->id;
+                       g_assert (id < job->summary.capacity);
+                       
+                       job->summary.per_class_data [id].unreachable.instances ++;
+                       job->summary.per_class_data [id].unreachable.bytes += size;
+               }
+               if (profiler->action_flags.unreachable_objects && job->dump_heap_data) {
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_report_object_unreachable: at job %p writing klass %p\n", job, klass);
+                       printf ("profiler_heap_report_object_unreachable: at job %p writing klass %p\n", job, klass);
 #endif
-               WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE (job, klass, HEAP_CODE_FREE_OBJECT_CLASS);
+                       WRITE_HEAP_SHOT_JOB_VALUE_WITH_CODE (job, klass, HEAP_CODE_FREE_OBJECT_CLASS);
        
 #if DEBUG_HEAP_PROFILER
-               printf ("profiler_heap_report_object_unreachable: at job %p writing size %p\n", job, GUINT_TO_POINTER (size));
+                       printf ("profiler_heap_report_object_unreachable: at job %p writing size %p\n", job, GUINT_TO_POINTER (size));
 #endif
-               WRITE_HEAP_SHOT_JOB_VALUE (job, GUINT_TO_POINTER (size));
+                       WRITE_HEAP_SHOT_JOB_VALUE (job, GUINT_TO_POINTER (size));
+               }
        }
 }
 
@@ -3064,8 +4607,15 @@ profiler_heap_scan (ProfilerHeapShotHeapBuffers *heap, ProfilerHeapShotWriteJob
        }
 }
 
+static inline gboolean
+heap_shot_write_job_should_be_created (gboolean dump_heap_data) {
+       return dump_heap_data || profiler->action_flags.unreachable_objects || profiler->action_flags.collection_summary;
+}
+
 static void
 handle_heap_profiling (MonoProfiler *profiler, MonoGCEvent ev) {
+       static gboolean dump_heap_data;
+       
        switch (ev) {
        case MONO_GC_EVENT_PRE_STOP_WORLD:
                // Get the lock, so we are sure nobody is flushing events during the collection,
@@ -3073,8 +4623,16 @@ handle_heap_profiling (MonoProfiler *profiler, MonoGCEvent ev) {
                LOCK_PROFILER ();
                break;
        case MONO_GC_EVENT_POST_STOP_WORLD:
-               // Update all mappings, so that we have built all the class descriptors.
-               flush_all_mappings ();
+               dump_heap_data = dump_current_heap_snapshot ();
+               if (heap_shot_write_job_should_be_created (dump_heap_data)) {
+                       ProfilerPerThreadData *data;
+                       // Update all mappings, so that we have built all the class descriptors.
+                       flush_all_mappings ();
+                       // Also write all event buffers, so that allocations are recorded.
+                       for (data = profiler->per_thread_data; data != NULL; data = data->next) {
+                               write_thread_data_block (data);
+                       }
+               }
                // Release lock...
                UNLOCK_PROFILER ();
                break;
@@ -3082,8 +4640,8 @@ handle_heap_profiling (MonoProfiler *profiler, MonoGCEvent ev) {
                ProfilerHeapShotWriteJob *job;
                ProfilerPerThreadData *data;
                
-               if (dump_current_heap_snapshot ()) {
-                       job = profiler_heap_shot_write_job_new (profiler->heap_shot_was_signalled);
+               if (heap_shot_write_job_should_be_created (dump_heap_data)) {
+                       job = profiler_heap_shot_write_job_new (profiler->heap_shot_was_signalled, dump_heap_data, profiler->garbage_collection_counter);
                        profiler->heap_shot_was_signalled = FALSE;
                        MONO_PROFILER_GET_CURRENT_COUNTER (job->start_counter);
                        MONO_PROFILER_GET_CURRENT_TIME (job->start_time);
@@ -3135,15 +4693,51 @@ handle_heap_profiling (MonoProfiler *profiler, MonoGCEvent ev) {
 
 static void
 gc_event (MonoProfiler *profiler, MonoGCEvent ev, int generation) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, generation, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, gc_event_code_from_profiler_event (ev), gc_event_kind_from_profiler_event (ev));
-       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot) {
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       gboolean do_heap_profiling = profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot || profiler->action_flags.collection_summary;
+       guint32 event_value;
+       
+       if (ev == MONO_GC_EVENT_START) {
+               profiler->garbage_collection_counter ++;
+       }
+       
+       event_value = (profiler->garbage_collection_counter << 8) | generation;
+       
+       if (do_heap_profiling && (ev == MONO_GC_EVENT_POST_STOP_WORLD)) {
+               handle_heap_profiling (profiler, ev);
+       }
+       
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_NUMBER_COUNTER (event, profiler, event_value, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, gc_event_code_from_profiler_event (ev), gc_event_kind_from_profiler_event (ev));
+       
+       if (do_heap_profiling && (ev != MONO_GC_EVENT_POST_STOP_WORLD)) {
                handle_heap_profiling (profiler, ev);
        }
 }
 
 static void
 gc_resize (MonoProfiler *profiler, gint64 new_size) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, new_size, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_GC_RESIZE, 0);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       profiler->garbage_collection_counter ++;
+       STORE_EVENT_NUMBER_VALUE (event, profiler, new_size, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_GC_RESIZE, 0, profiler->garbage_collection_counter);
+}
+
+static void
+runtime_initialized (MonoProfiler *profiler) {
+       LOG_WRITER_THREAD ("runtime_initialized: waking writer thread to enable it...\n");
+       WRITER_EVENT_ENABLE_RAISE ();
+       LOG_WRITER_THREAD ("runtime_initialized: waiting writer thread...\n");
+       WRITER_EVENT_DONE_WAIT ();
+       LOG_WRITER_THREAD ("runtime_initialized: writer thread enabled.\n");
+       mono_add_internal_call ("Mono.Profiler.RuntimeControls::EnableProfiler", enable_profiler);
+       mono_add_internal_call ("Mono.Profiler.RuntimeControls::DisableProfiler", disable_profiler);
+       mono_add_internal_call ("Mono.Profiler.RuntimeControls::TakeHeapSnapshot", request_heap_snapshot);
+       LOG_WRITER_THREAD ("runtime_initialized: initialized internal calls.\n");
 }
 
 /* called at the end of the program */
@@ -3151,6 +4745,7 @@ static void
 profiler_shutdown (MonoProfiler *prof)
 {
        ProfilerPerThreadData* current_thread_data;
+       ProfilerPerThreadData* next_thread_data;
        
        LOG_WRITER_THREAD ("profiler_shutdown: zeroing relevant flags");
        mono_profiler_set_events (0);
@@ -3167,15 +4762,14 @@ profiler_shutdown (MonoProfiler *prof)
        WRITER_EVENT_DESTROY ();
        
        LOCK_PROFILER ();
-       
+       flush_everything ();
        MONO_PROFILER_GET_CURRENT_TIME (profiler->end_time);
        MONO_PROFILER_GET_CURRENT_COUNTER (profiler->end_counter);
-       
-       flush_everything ();
        write_end_block ();
        FLUSH_FILE ();
        CLOSE_FILE();
        UNLOCK_PROFILER ();
+       
        g_free (profiler->file_name);
        if (profiler->file_name_suffix != NULL) {
                g_free (profiler->file_name_suffix);
@@ -3189,7 +4783,8 @@ profiler_shutdown (MonoProfiler *prof)
        
        FREE_PROFILER_THREAD_DATA ();
        
-       for (current_thread_data = profiler->per_thread_data; current_thread_data != NULL; current_thread_data = current_thread_data->next) {
+       for (current_thread_data = profiler->per_thread_data; current_thread_data != NULL; current_thread_data = next_thread_data) {
+               next_thread_data = current_thread_data->next;
                profiler_per_thread_data_destroy (current_thread_data);
        }
        if (profiler->statistical_data != NULL) {
@@ -3204,7 +4799,6 @@ profiler_shutdown (MonoProfiler *prof)
        if (profiler->executable_regions != NULL) {
                profiler_executable_memory_regions_destroy (profiler->executable_regions);
        }
-       unmanaged_functions_dispose (&(profiler->unmanaged_functions));
        
        profiler_heap_buffers_free (&(profiler->heap));
        if (profiler->heap_shot_command_file_name != NULL) {
@@ -3226,18 +4820,61 @@ profiler_shutdown (MonoProfiler *prof)
        profiler = NULL;
 }
 
+#ifndef PLATFORM_WIN32
+static int
+parse_signal_name (const char *signal_name) {
+       if (! strcasecmp (signal_name, "SIGUSR1")) {
+               return SIGUSR1;
+       } else if (! strcasecmp (signal_name, "SIGUSR2")) {
+               return SIGUSR2;
+       } else if (! strcasecmp (signal_name, "SIGPROF")) {
+               return SIGPROF;
+       } else {
+               return atoi (signal_name);
+       }
+}
+static gboolean
+check_signal_number (int signal_number) {
+       if (((signal_number == SIGPROF) && ! (profiler->flags & MONO_PROFILE_STATISTICAL)) ||
+                       (signal_number == SIGUSR1) ||
+                       (signal_number == SIGUSR2)) {
+               return TRUE;
+       } else {
+               return FALSE;
+       }
+}
+#endif
+
+#define FAIL_ARGUMENT_CHECK(message) do {\
+       failure_message = (message);\
+       goto failure_handling;\
+} while (0)
+#define FAIL_PARSING_VALUED_ARGUMENT FAIL_ARGUMENT_CHECK("cannot parse valued argument %s")
+#define FAIL_PARSING_FLAG_ARGUMENT FAIL_ARGUMENT_CHECK("cannot parse flag argument %s")
+#define CHECK_CONDITION(condition,message) do {\
+       gboolean result = (condition);\
+       if (result) {\
+               FAIL_ARGUMENT_CHECK (message);\
+       }\
+} while (0)
+#define FAIL_IF_HAS_MINUS CHECK_CONDITION(has_minus,"minus ('-') modifier not allowed for argument %s")
+#define TRUE_IF_NOT_MINUS ((!has_minus)?TRUE:FALSE)
+
 #define DEFAULT_ARGUMENTS "s"
 static void
 setup_user_options (const char *arguments) {
        gchar **arguments_array, **current_argument;
 #ifndef PLATFORM_WIN32
        int gc_request_signal_number = 0;
+       int toggle_signal_number = 0;
 #endif
+       detect_fast_timer ();
        
        profiler->file_name = NULL;
        profiler->file_name_suffix = NULL;
        profiler->per_thread_buffer_size = 10000;
        profiler->statistical_buffer_size = 10000;
+       profiler->statistical_call_chain_depth = 0;
        profiler->write_buffer_size = 1024;
        profiler->heap_shot_command_file_name = NULL;
        profiler->dump_next_heap_snapshots = 0;
@@ -3247,7 +4884,9 @@ setup_user_options (const char *arguments) {
                        MONO_PROFILE_ASSEMBLY_EVENTS|
                        MONO_PROFILE_MODULE_EVENTS|
                        MONO_PROFILE_CLASS_EVENTS|
-                       MONO_PROFILE_METHOD_EVENTS;
+                       MONO_PROFILE_METHOD_EVENTS|
+                       MONO_PROFILE_JIT_COMPILATION;
+       profiler->profiler_enabled = TRUE;
        
        if (arguments == NULL) {
                arguments = DEFAULT_ARGUMENTS;
@@ -3263,109 +4902,228 @@ setup_user_options (const char *arguments) {
        for (current_argument = arguments_array; ((current_argument != NULL) && (current_argument [0] != 0)); current_argument ++) {
                char *argument = *current_argument;
                char *equals = strstr (argument, "=");
+               const char *failure_message = NULL;
+               gboolean has_plus;
+               gboolean has_minus;
+               
+               if (*argument == '+') {
+                       has_plus = TRUE;
+                       has_minus = FALSE;
+                       argument ++;
+               } else if (*argument == '-') {
+                       has_plus = FALSE;
+                       has_minus = TRUE;
+                       argument ++;
+               } else {
+                       has_plus = FALSE;
+                       has_minus = FALSE;
+               }
                
                if (equals != NULL) {
                        int equals_position = equals - argument;
                        
                        if (! (strncmp (argument, "per-thread-buffer-size", equals_position) && strncmp (argument, "tbs", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        profiler->per_thread_buffer_size = value;
                                }
+                       } else if (! (strncmp (argument, "statistical", equals_position) && strncmp (argument, "stat", equals_position) && strncmp (argument, "s", equals_position))) {
+                               int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
+                               if (value > 0) {
+                                       if (value > 16) {
+                                               value = 16;
+                                       }
+                                       profiler->statistical_call_chain_depth = value;
+                                       profiler->flags |= MONO_PROFILE_STATISTICAL;
+                               }
                        } else if (! (strncmp (argument, "statistical-thread-buffer-size", equals_position) && strncmp (argument, "sbs", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        profiler->statistical_buffer_size = value;
                                }
                        } else if (! (strncmp (argument, "write-buffer-size", equals_position) && strncmp (argument, "wbs", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        profiler->write_buffer_size = value;
                                }
                        } else if (! (strncmp (argument, "output", equals_position) && strncmp (argument, "out", equals_position) && strncmp (argument, "o", equals_position) && strncmp (argument, "O", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->file_name = g_strdup (equals + 1);
                                }
                        } else if (! (strncmp (argument, "output-suffix", equals_position) && strncmp (argument, "suffix", equals_position) && strncmp (argument, "os", equals_position) && strncmp (argument, "OS", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->file_name_suffix = g_strdup (equals + 1);
                                }
+                       } else if (! (strncmp (argument, "heap-shot", equals_position) && strncmp (argument, "heap", equals_position) && strncmp (argument, "h", equals_position))) {
+                               char *parameter = equals + 1;
+                               if (! strcmp (parameter, "all")) {
+                                       profiler->dump_next_heap_snapshots = -1;
+                               } else {
+                                       gc_request_signal_number = parse_signal_name (parameter);
+                               }
+                               FAIL_IF_HAS_MINUS;
+                               if (! has_plus) {
+                                       profiler->action_flags.save_allocation_caller = TRUE;
+                                       profiler->action_flags.save_allocation_stack = TRUE;
+                                       profiler->action_flags.allocations_carry_id = TRUE_IF_NOT_MINUS;
+                               }
+                               profiler->action_flags.heap_shot = TRUE_IF_NOT_MINUS;
                        } else if (! (strncmp (argument, "gc-commands", equals_position) && strncmp (argument, "gc-c", equals_position) && strncmp (argument, "gcc", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->heap_shot_command_file_name = g_strdup (equals + 1);
                                }
                        } else if (! (strncmp (argument, "gc-dumps", equals_position) && strncmp (argument, "gc-d", equals_position) && strncmp (argument, "gcd", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->dump_next_heap_snapshots = atoi (equals + 1);
                                }
 #ifndef PLATFORM_WIN32
                        } else if (! (strncmp (argument, "gc-signal", equals_position) && strncmp (argument, "gc-s", equals_position) && strncmp (argument, "gcs", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        char *signal_name = equals + 1;
-                                       if (! strcasecmp (signal_name, "SIGUSR1")) {
-                                               gc_request_signal_number = SIGUSR1;
-                                       } else if (! strcasecmp (signal_name, "SIGUSR2")) {
-                                               gc_request_signal_number = SIGUSR2;
-                                       } else if (! strcasecmp (signal_name, "SIGPROF")) {
-                                               gc_request_signal_number = SIGPROF;
-                                       } else {
-                                               gc_request_signal_number = atoi (signal_name);
-                                       }
+                                       gc_request_signal_number = parse_signal_name (signal_name);
+                               }
+                       } else if (! (strncmp (argument, "toggle-signal", equals_position) && strncmp (argument, "ts", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
+                               if (strlen (equals + 1) > 0) {
+                                       char *signal_name = equals + 1;
+                                       toggle_signal_number = parse_signal_name (signal_name);
                                }
 #endif
                        } else {
-                               g_warning ("Cannot parse valued argument %s\n", argument);
+                               FAIL_PARSING_VALUED_ARGUMENT;
                        }
                } else {
                        if (! (strcmp (argument, "jit") && strcmp (argument, "j"))) {
-                               profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
-                               profiler->action_flags.jit_time = TRUE;
+                               profiler->action_flags.jit_time = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "allocations") && strcmp (argument, "alloc") && strcmp (argument, "a"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
+                               FAIL_IF_HAS_MINUS;
+                               if (! has_plus) {
+                                       profiler->action_flags.save_allocation_caller = TRUE;
+                                       profiler->action_flags.save_allocation_stack = TRUE;
+                               }
+                               if (! has_minus) {
+                                       profiler->flags |= MONO_PROFILE_ALLOCATIONS;
+                               } else {
+                                       profiler->flags &= ~MONO_PROFILE_ALLOCATIONS;
+                               }
                        } else if (! (strcmp (argument, "gc") && strcmp (argument, "g"))) {
+                               FAIL_IF_HAS_MINUS;
                                profiler->flags |= MONO_PROFILE_GC;
+                       } else if (! (strcmp (argument, "allocations-summary") && strcmp (argument, "as"))) {
+                               profiler->action_flags.collection_summary = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "heap-shot") && strcmp (argument, "heap") && strcmp (argument, "h"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
-                               profiler->action_flags.unreachable_objects = TRUE;
-                               profiler->action_flags.heap_shot = TRUE;
+                               FAIL_IF_HAS_MINUS;
+                               if (! has_plus) {
+                                       profiler->action_flags.save_allocation_caller = TRUE;
+                                       profiler->action_flags.save_allocation_stack = TRUE;
+                                       profiler->action_flags.allocations_carry_id = TRUE_IF_NOT_MINUS;
+                               }
+                               profiler->action_flags.heap_shot = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "unreachable") && strcmp (argument, "free") && strcmp (argument, "f"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
-                               profiler->action_flags.unreachable_objects = TRUE;
+                               profiler->action_flags.unreachable_objects = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "threads") && strcmp (argument, "t"))) {
-                               profiler->flags |= MONO_PROFILE_THREADS;
+                               if (! has_minus) {
+                                       profiler->flags |= MONO_PROFILE_THREADS;
+                               } else {
+                                       profiler->flags &= ~MONO_PROFILE_THREADS;
+                               }
                        } else if (! (strcmp (argument, "enter-leave") && strcmp (argument, "calls") && strcmp (argument, "c"))) {
-                               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+                               profiler->action_flags.track_calls = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "statistical") && strcmp (argument, "stat") && strcmp (argument, "s"))) {
-                               profiler->flags |= MONO_PROFILE_STATISTICAL|MONO_PROFILE_JIT_COMPILATION;
-                               profiler->action_flags.jit_time = TRUE;
+                               if (! has_minus) {
+                                       profiler->flags |= MONO_PROFILE_STATISTICAL;
+                               } else {
+                                       profiler->flags &= ~MONO_PROFILE_STATISTICAL;
+                               }
+                       } else if (! (strcmp (argument, "save-allocation-caller") && strcmp (argument, "sac"))) {
+                               profiler->action_flags.save_allocation_caller = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "save-allocation-stack") && strcmp (argument, "sas"))) {
+                               profiler->action_flags.save_allocation_stack = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "allocations-carry-id") && strcmp (argument, "aci"))) {
+                               profiler->action_flags.allocations_carry_id = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "start-enabled") && strcmp (argument, "se"))) {
+                               profiler->profiler_enabled = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "start-disabled") && strcmp (argument, "sd"))) {
+                               profiler->profiler_enabled = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "force-accurate-timer") && strcmp (argument, "fac"))) {
+                               use_fast_timer = TRUE_IF_NOT_MINUS;
 #if (HAS_OPROFILE)
                        } else if (! (strcmp (argument, "oprofile") && strcmp (argument, "oprof"))) {
                                profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
                                profiler->action_flags.oprofile = TRUE;
                                if (op_open_agent ()) {
-                                       g_warning ("Problem calling op_open_agent\n");
+                                       FAIL_ARGUMENT_CHECK ("problem calling op_open_agent");
                                }
 #endif
                        } else if (strcmp (argument, "logging")) {
-                               g_warning ("Cannot parse flag argument %s\n", argument);
+                               FAIL_PARSING_FLAG_ARGUMENT;
                        }
                }
+               
+failure_handling:
+               if (failure_message != NULL) {
+                       g_warning (failure_message, argument);
+                       failure_message = NULL;
+               }
        }
        
        g_free (arguments_array);
        
 #ifndef PLATFORM_WIN32
        if (gc_request_signal_number != 0) {
-               if (((gc_request_signal_number == SIGPROF) && ! (profiler->flags & MONO_PROFILE_STATISTICAL)) ||
-                               (gc_request_signal_number == SIGUSR1) ||
-                               (gc_request_signal_number == SIGUSR2)) {
+               if (check_signal_number (gc_request_signal_number) && (gc_request_signal_number != toggle_signal_number)) {
                        add_gc_request_handler (gc_request_signal_number);
                } else {
                        g_error ("Cannot use signal %d", gc_request_signal_number);
                }
        }
+       if (toggle_signal_number != 0) {
+               if (check_signal_number (toggle_signal_number) && (toggle_signal_number != gc_request_signal_number)) {
+                       add_toggle_handler (toggle_signal_number);
+               } else {
+                       g_error ("Cannot use signal %d", gc_request_signal_number);
+               }
+       }
 #endif
        
+       /* Ensure that the profiler flags needed to support required action flags are active */
+       if (profiler->action_flags.jit_time) {
+               profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
+       }
+       if (profiler->action_flags.save_allocation_caller || profiler->action_flags.save_allocation_stack || profiler->action_flags.allocations_carry_id) {
+               profiler->flags |= MONO_PROFILE_ALLOCATIONS;
+       }
+       if (profiler->action_flags.collection_summary || profiler->action_flags.heap_shot || profiler->action_flags.unreachable_objects) {
+               profiler->flags |= MONO_PROFILE_ALLOCATIONS;
+       }
+       if (profiler->action_flags.track_calls) {
+               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+               profiler->action_flags.jit_time = TRUE;
+       }
+       if (profiler->action_flags.save_allocation_caller || profiler->action_flags.save_allocation_stack) {
+               profiler->action_flags.track_stack = TRUE;
+               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+       }
+       
+       /* Without JIT events the stat profiler will not find method IDs... */
+       if (profiler->flags | MONO_PROFILE_STATISTICAL) {
+               profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
+       }
+       /* Profiling allocations without knowing which gc we are doing is not nice... */
+       if (profiler->flags | MONO_PROFILE_ALLOCATIONS) {
+               profiler->flags |= MONO_PROFILE_GC;
+       }
+
+       
        if (profiler->file_name == NULL) {
                char *program_name = g_get_prgname ();
                
@@ -3425,6 +5183,27 @@ data_writer_thread (gpointer nothing) {
        static gboolean thread_detached = FALSE;
        static MonoThread *this_thread = NULL;
        
+       /* Wait for the OK to attach to the runtime */
+       WRITER_EVENT_ENABLE_WAIT ();
+       if (! profiler->terminate_writer_thread) {
+               MonoDomain * root_domain = mono_get_root_domain ();
+               if (root_domain != NULL) {
+                       LOG_WRITER_THREAD ("data_writer_thread: attaching thread");
+                       this_thread = mono_thread_attach (root_domain);
+                       mono_thread_set_manage_callback (this_thread, thread_detach_callback);
+                       thread_attached = TRUE;
+               } else {
+                       g_error ("Cannot get root domain\n");
+               }
+       } else {
+               /* Execution was too short, pretend we attached and detached. */
+               thread_attached = TRUE;
+               thread_detached = TRUE;
+       }
+       profiler->writer_thread_enabled = TRUE;
+       /* Notify that we are attached to the runtime */
+       WRITER_EVENT_DONE_RAISE ();
+       
        for (;;) {
                ProfilerStatisticalData *statistical_data;
                gboolean done;
@@ -3433,24 +5212,6 @@ data_writer_thread (gpointer nothing) {
                WRITER_EVENT_WAIT ();
                LOG_WRITER_THREAD ("data_writer_thread: just woke up");
                
-               if (! thread_attached) {
-                       if (! profiler->terminate_writer_thread) {
-                               MonoDomain * root_domain = mono_get_root_domain ();
-                               if (root_domain != NULL) {
-                                       LOG_WRITER_THREAD ("data_writer_thread: attaching thread");
-                                       this_thread = mono_thread_attach (root_domain);
-                                       mono_thread_set_manage_callback (this_thread, thread_detach_callback);
-                                       thread_attached = TRUE;
-                               } else {
-                                       g_error ("Cannot get root domain\n");
-                               }
-                       } else {
-                               /* Execution was too short, pretend we attached and detached. */
-                               thread_attached = TRUE;
-                               thread_detached = TRUE;
-                       }
-               }
-               
                if (profiler->heap_shot_was_signalled) {
                        LOG_WRITER_THREAD ("data_writer_thread: starting requested collection");
                        mono_gc_collect (mono_gc_max_generation ());
@@ -3458,35 +5219,63 @@ data_writer_thread (gpointer nothing) {
                }
                
                statistical_data = profiler->statistical_data_ready;
-               done = (statistical_data == NULL) && (profiler->heap_shot_write_jobs == NULL);
+               done = (statistical_data == NULL) && (profiler->heap_shot_write_jobs == NULL) && (profiler->writer_thread_flush_everything == FALSE);
                
-               if (!done) {
-                       LOG_WRITER_THREAD ("data_writer_thread: acquiring lock and writing data");
-                       LOCK_PROFILER ();
-                       
-                       // This makes sure that all method ids are in place
-                       LOG_WRITER_THREAD ("data_writer_thread: writing mapping...");
-                       flush_all_mappings ();
-                       LOG_WRITER_THREAD ("data_writer_thread: wrote mapping");
-                       
-                       if ((statistical_data != NULL) && ! thread_detached) {
-                               LOG_WRITER_THREAD ("data_writer_thread: writing statistical data...");
-                               profiler->statistical_data_ready = NULL;
-                               write_statistical_data_block (statistical_data);
-                               statistical_data->next_free_index = 0;
-                               statistical_data->first_unwritten_index = 0;
-                               profiler->statistical_data_second_buffer = statistical_data;
-                               LOG_WRITER_THREAD ("data_writer_thread: wrote statistical data");
+               if ((!done) && thread_attached) {
+                       if (profiler->writer_thread_flush_everything) {
+                               /* Note that this assumes the lock is held by the thread that woke us up! */
+                               if (! thread_detached) {
+                                       LOG_WRITER_THREAD ("data_writer_thread: flushing everything...");
+                                       flush_everything ();
+                                       profiler->writer_thread_flush_everything = FALSE;
+                                       WRITER_EVENT_DONE_RAISE ();
+                                       LOG_WRITER_THREAD ("data_writer_thread: flushed everything.");
+                               } else {
+                                       LOG_WRITER_THREAD ("data_writer_thread: flushing requested, but thread is detached...");
+                                       profiler->writer_thread_flush_everything = FALSE;
+                                       WRITER_EVENT_DONE_RAISE ();
+                                       LOG_WRITER_THREAD ("data_writer_thread: done event raised.");
+                               }
+                       } else {
+                               LOG_WRITER_THREAD ("data_writer_thread: acquiring lock and writing data");
+                               LOCK_PROFILER ();
+                               
+                               // This makes sure that all method ids are in place
+                               LOG_WRITER_THREAD ("data_writer_thread: writing mapping...");
+                               flush_all_mappings ();
+                               LOG_WRITER_THREAD ("data_writer_thread: wrote mapping");
+                               
+                               if ((statistical_data != NULL) && ! thread_detached) {
+                                       LOG_WRITER_THREAD ("data_writer_thread: writing statistical data...");
+                                       profiler->statistical_data_ready = NULL;
+                                       write_statistical_data_block (statistical_data);
+                                       statistical_data->next_free_index = 0;
+                                       statistical_data->first_unwritten_index = 0;
+                                       profiler->statistical_data_second_buffer = statistical_data;
+                                       LOG_WRITER_THREAD ("data_writer_thread: wrote statistical data");
+                               }
+                               
+                               profiler_process_heap_shot_write_jobs ();
+                               
+                               UNLOCK_PROFILER ();
+                               LOG_WRITER_THREAD ("data_writer_thread: wrote data and released lock");
+                       }
+               } else {
+                       if (profiler->writer_thread_flush_everything) {
+                               LOG_WRITER_THREAD ("data_writer_thread: flushing requested, but thread is not attached...");
+                               profiler->writer_thread_flush_everything = FALSE;
+                               WRITER_EVENT_DONE_RAISE ();
+                               LOG_WRITER_THREAD ("data_writer_thread: done event raised.");
                        }
-                       
-                       profiler_process_heap_shot_write_jobs ();
-                       
-                       UNLOCK_PROFILER ();
-                       LOG_WRITER_THREAD ("data_writer_thread: wrote data and released lock");
                }
                
                if (profiler->detach_writer_thread) {
                        if (this_thread != NULL) {
+                               LOG_WRITER_THREAD ("data_writer_thread: detach requested, acquiring lock and flushing data");
+                               LOCK_PROFILER ();
+                               flush_everything ();
+                               UNLOCK_PROFILER ();
+                               LOG_WRITER_THREAD ("data_writer_thread: flushed data and released lock");
                                LOG_WRITER_THREAD ("data_writer_thread: detaching thread");
                                mono_thread_detach (this_thread);
                                this_thread = NULL;
@@ -3499,6 +5288,7 @@ data_writer_thread (gpointer nothing) {
                
                if (profiler->terminate_writer_thread) {
                LOG_WRITER_THREAD ("data_writer_thread: exiting thread");
+                       CLEANUP_WRITER_THREAD ();
                        EXIT_THREAD ();
                }
        }
@@ -3514,11 +5304,12 @@ mono_profiler_startup (const char *desc)
 {
        profiler = g_new0 (MonoProfiler, 1);
        
-       setup_user_options ((desc != NULL) ? desc : "");
+       setup_user_options ((desc != NULL) ? desc : DEFAULT_ARGUMENTS);
        
        INITIALIZE_PROFILER_MUTEX ();
        MONO_PROFILER_GET_CURRENT_TIME (profiler->start_time);
        MONO_PROFILER_GET_CURRENT_COUNTER (profiler->start_counter);
+       profiler->last_header_counter = 0;
        
        profiler->methods = method_id_mapping_new ();
        profiler->classes = class_id_mapping_new ();
@@ -3526,9 +5317,8 @@ mono_profiler_startup (const char *desc)
        profiler->loaded_modules = g_hash_table_new_full (g_direct_hash, NULL, NULL, loaded_element_destroy);
        profiler->loaded_appdomains = g_hash_table_new_full (g_direct_hash, NULL, NULL, loaded_element_destroy);
        
-       profiler->statistical_data = profiler_statistical_data_new (profiler->statistical_buffer_size);
-       profiler->statistical_data_second_buffer = profiler_statistical_data_new (profiler->statistical_buffer_size);
-       unmanaged_functions_init (&(profiler->unmanaged_functions));
+       profiler->statistical_data = profiler_statistical_data_new (profiler);
+       profiler->statistical_data_second_buffer = profiler_statistical_data_new (profiler);
        
        profiler->write_buffers = g_malloc (sizeof (ProfilerFileWriteBuffer) + PROFILER_FILE_WRITE_BUFFER_SIZE);
        profiler->write_buffers->next = NULL;
@@ -3536,14 +5326,18 @@ mono_profiler_startup (const char *desc)
        profiler->current_write_position = 0;
        profiler->full_write_buffers = 0;
        
-       profiler->executable_regions = profiler_executable_memory_regions_new (1);
+       profiler->executable_regions = profiler_executable_memory_regions_new (1, 1);
+       
+       profiler->executable_files.table = g_hash_table_new (g_str_hash, g_str_equal); 
+       profiler->executable_files.new_files = NULL; 
        
        profiler->heap_shot_write_jobs = NULL;
-       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot) {
+       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot || profiler->action_flags.collection_summary) {
                profiler_heap_buffers_setup (&(profiler->heap));
        } else {
                profiler_heap_buffers_clear (&(profiler->heap));
        }
+       profiler->garbage_collection_counter = 0;
        
        WRITER_EVENT_INIT ();
        LOG_WRITER_THREAD ("mono_profiler_startup: creating writer thread");
@@ -3555,6 +5349,7 @@ mono_profiler_startup (const char *desc)
        OPEN_FILE ();
        
        write_intro_block ();
+       write_directives_block (TRUE);
        
        mono_profiler_install (profiler, profiler_shutdown);
        
@@ -3572,7 +5367,9 @@ mono_profiler_startup (const char *desc)
        mono_profiler_install_thread (thread_start, thread_end);
        mono_profiler_install_allocation (object_allocated);
        mono_profiler_install_statistical (statistical_hit);
+       mono_profiler_install_statistical_call_chain (statistical_call_chain, profiler->statistical_call_chain_depth);
        mono_profiler_install_gc (gc_event, gc_resize);
+       mono_profiler_install_runtime_initialized (runtime_initialized);
 #if (HAS_OPROFILE)
        mono_profiler_install_jit_end (method_jit_result);
 #endif