2009-02-17 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / profiler / mono-profiler-logging.c
index 2c3cb4bcea108ee941c192c155865cace40447a9..7aa8c5f6b2b31df1e5b300a3ef6aaa48b4ba3d94 100644 (file)
@@ -1,6 +1,7 @@
 #include <config.h>
 #include <mono/metadata/profiler.h>
 #include <mono/metadata/class.h>
+#include <mono/metadata/class-internals.h>
 #include <mono/metadata/assembly.h>
 #include <mono/metadata/loader.h>
 #include <mono/metadata/threads.h>
@@ -33,9 +34,19 @@ typedef enum {
        MONO_PROFILER_FILE_BLOCK_KIND_EVENTS = 6,
        MONO_PROFILER_FILE_BLOCK_KIND_STATISTICAL = 7,
        MONO_PROFILER_FILE_BLOCK_KIND_HEAP_DATA = 8,
-       MONO_PROFILER_FILE_BLOCK_KIND_HEAP_SUMMARY = 9
+       MONO_PROFILER_FILE_BLOCK_KIND_HEAP_SUMMARY = 9,
+       MONO_PROFILER_FILE_BLOCK_KIND_DIRECTIVES = 10
 } MonoProfilerFileBlockKind;
 
+typedef enum {
+       MONO_PROFILER_DIRECTIVE_END = 0,
+       MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_CALLER = 1,
+       MONO_PROFILER_DIRECTIVE_ALLOCATIONS_HAVE_STACK = 2,
+       MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_ID = 3,
+       MONO_PROFILER_DIRECTIVE_LAST
+} MonoProfilerDirectives;
+
+
 #define MONO_PROFILER_LOADED_EVENT_MODULE     1
 #define MONO_PROFILER_LOADED_EVENT_ASSEMBLY   2
 #define MONO_PROFILER_LOADED_EVENT_APPDOMAIN  4
@@ -54,18 +65,20 @@ typedef struct _ProfilerEventData {
                gsize number;
        } data;
        unsigned int data_type:2;
-       unsigned int code:3;
+       unsigned int code:4;
        unsigned int kind:1;
-       unsigned int value:26;
+       unsigned int value:25;
 } ProfilerEventData;
 
-#define EVENT_VALUE_BITS (26)
+#define EVENT_VALUE_BITS (25)
 #define MAX_EVENT_VALUE ((1<<EVENT_VALUE_BITS)-1)
 
 typedef enum {
        MONO_PROFILER_EVENT_METHOD_JIT = 0,
        MONO_PROFILER_EVENT_METHOD_FREED = 1,
-       MONO_PROFILER_EVENT_METHOD_CALL = 2
+       MONO_PROFILER_EVENT_METHOD_CALL = 2,
+       MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER = 3,
+       MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER = 4
 } MonoProfilerMethodEvents;
 typedef enum {
        MONO_PROFILER_EVENT_CLASS_LOAD = 0,
@@ -85,7 +98,10 @@ typedef enum {
        MONO_PROFILER_EVENT_GC_SWEEP = 4,
        MONO_PROFILER_EVENT_GC_RESIZE = 5,
        MONO_PROFILER_EVENT_GC_STOP_WORLD = 6,
-       MONO_PROFILER_EVENT_GC_START_WORLD = 7
+       MONO_PROFILER_EVENT_GC_START_WORLD = 7,
+       MONO_PROFILER_EVENT_JIT_TIME_ALLOCATION = 8,
+       MONO_PROFILER_EVENT_STACK_SECTION = 9,
+       MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID = 10
 } MonoProfilerEvents;
 typedef enum {
        MONO_PROFILER_EVENT_KIND_START = 0,
@@ -217,7 +233,7 @@ static __inline__ guint64 rdtsc(void) {
        }\
 } while (0)
 #else
-static detect_fast_timer (void) {
+static void detect_fast_timer (void) {
        use_fast_timer = FALSE;
 }
 #define MONO_PROFILER_GET_CURRENT_COUNTER(c) MONO_PROFILER_GET_CURRENT_TIME ((c))
@@ -356,6 +372,16 @@ typedef struct _ProfilerHeapShotWriteJob {
        gboolean dump_heap_data;
 } ProfilerHeapShotWriteJob;
 
+typedef struct _ProfilerThreadStack {
+       guint32 capacity;
+       guint32 top;
+       guint32 last_saved_top;
+       guint32 last_written_frame;
+       MonoMethod **stack;
+       guint8 *method_is_jitted;
+       guint32 *written_frames;
+} ProfilerThreadStack;
+
 typedef struct _ProfilerPerThreadData {
        ProfilerEventData *events;
        ProfilerEventData *next_free_event;
@@ -366,14 +392,20 @@ typedef struct _ProfilerPerThreadData {
        guint64 last_event_counter;
        gsize thread_id;
        ProfilerHeapShotObjectBuffer *heap_shot_object_buffers;
+       ProfilerThreadStack stack;
        struct _ProfilerPerThreadData* next;
 } ProfilerPerThreadData;
 
+typedef struct _ProfilerStatisticalHit {
+       gpointer *address;
+       MonoDomain *domain;
+} ProfilerStatisticalHit;
+
 typedef struct _ProfilerStatisticalData {
-       gpointer *addresses;
-       int next_free_index;
-       int end_index;
-       int first_unwritten_index;
+       ProfilerStatisticalHit *hits;
+       unsigned int next_free_index;
+       unsigned int end_index;
+       unsigned int first_unwritten_index;
 } ProfilerStatisticalData;
 
 typedef struct _ProfilerUnmanagedSymbol {
@@ -384,6 +416,7 @@ typedef struct _ProfilerUnmanagedSymbol {
 } ProfilerUnmanagedSymbol;
 
 struct _ProfilerExecutableFile;
+struct _ProfilerExecutableFileSectionRegion;
 
 typedef struct _ProfilerExecutableMemoryRegionData {
        gpointer start;
@@ -394,6 +427,7 @@ typedef struct _ProfilerExecutableMemoryRegionData {
        gboolean is_new;
        
        struct _ProfilerExecutableFile *file;
+       struct _ProfilerExecutableFileSectionRegion *file_region_reference;
        guint32 symbols_count;
        guint32 symbols_capacity;
        ProfilerUnmanagedSymbol *symbols;
@@ -582,6 +616,9 @@ typedef struct _ProfilerExecutableFiles {
 } ProfilerExecutableFiles;
 
 
+#define CLEANUP_WRITER_THREAD() do {profiler->writer_thread_terminated = TRUE;} while (0)
+#define CHECK_WRITER_THREAD() (! profiler->writer_thread_terminated)
+
 #ifndef PLATFORM_WIN32
 #include <sys/types.h>
 #include <sys/time.h>
@@ -606,7 +643,11 @@ typedef struct _ProfilerExecutableFiles {
 #define THREAD_TYPE pthread_t
 #define CREATE_WRITER_THREAD(f) pthread_create (&(profiler->data_writer_thread), NULL, ((void*(*)(void*))f), NULL)
 #define EXIT_THREAD() pthread_exit (NULL);
-#define WAIT_WRITER_THREAD() pthread_join (profiler->data_writer_thread, NULL)
+#define WAIT_WRITER_THREAD() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+               pthread_join (profiler->data_writer_thread, NULL);\
+       }\
+} while (0)
 #define CURRENT_THREAD_ID() (gsize) pthread_self ()
 
 #ifndef HAVE_KW_THREAD
@@ -623,10 +664,26 @@ make_pthread_profiler_key (void) {
 #endif
 
 #define EVENT_TYPE sem_t
-#define WRITER_EVENT_INIT() (void) sem_init (&(profiler->statistical_data_writer_event), 0, 0)
-#define WRITER_EVENT_DESTROY() (void) sem_destroy (&(profiler->statistical_data_writer_event))
-#define WRITER_EVENT_WAIT() (void) sem_wait (&(profiler->statistical_data_writer_event))
-#define WRITER_EVENT_RAISE() (void) sem_post (&(profiler->statistical_data_writer_event))
+#define WRITER_EVENT_INIT() do {\
+       sem_init (&(profiler->enable_data_writer_event), 0, 0);\
+       sem_init (&(profiler->wake_data_writer_event), 0, 0);\
+       sem_init (&(profiler->done_data_writer_event), 0, 0);\
+} while (0)
+#define WRITER_EVENT_DESTROY() do {\
+       sem_destroy (&(profiler->enable_data_writer_event));\
+       sem_destroy (&(profiler->wake_data_writer_event));\
+       sem_destroy (&(profiler->done_data_writer_event));\
+} while (0)
+#define WRITER_EVENT_WAIT() (void) sem_wait (&(profiler->wake_data_writer_event))
+#define WRITER_EVENT_RAISE() (void) sem_post (&(profiler->wake_data_writer_event))
+#define WRITER_EVENT_ENABLE_WAIT() (void) sem_wait (&(profiler->enable_data_writer_event))
+#define WRITER_EVENT_ENABLE_RAISE() (void) sem_post (&(profiler->enable_data_writer_event))
+#define WRITER_EVENT_DONE_WAIT() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+               (void) sem_wait (&(profiler->done_data_writer_event));\
+       }\
+} while (0)
+#define WRITER_EVENT_DONE_RAISE() (void) sem_post (&(profiler->done_data_writer_event))
 
 #if 0
 #define FILE_HANDLE_TYPE FILE*
@@ -655,7 +712,11 @@ make_pthread_profiler_key (void) {
 #define THREAD_TYPE HANDLE
 #define CREATE_WRITER_THREAD(f) CreateThread (NULL, (1*1024*1024), (f), NULL, 0, NULL);
 #define EXIT_THREAD() ExitThread (0);
-#define WAIT_WRITER_THREAD() WaitForSingleObject (profiler->data_writer_thread, INFINITE)
+#define WAIT_WRITER_THREAD() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+                WaitForSingleObject (profiler->data_writer_thread, INFINITE);\
+       }\
+} while (0)
 #define CURRENT_THREAD_ID() (gsize) GetCurrentThreadId ()
 
 #ifndef HAVE_KW_THREAD
@@ -667,10 +728,27 @@ static guint32 profiler_thread_id = -1;
 #endif
 
 #define EVENT_TYPE HANDLE
-#define WRITER_EVENT_INIT() profiler->statistical_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL)
+#define WRITER_EVENT_INIT() (void) do {\
+       profiler->enable_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL);\
+       profiler->wake_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL);\
+       profiler->done_data_writer_event = CreateEvent (NULL, FALSE, FALSE, NULL);\
+} while (0)
 #define WRITER_EVENT_DESTROY() CloseHandle (profiler->statistical_data_writer_event)
-#define WRITER_EVENT_WAIT() WaitForSingleObject (profiler->statistical_data_writer_event, INFINITE)
-#define WRITER_EVENT_RAISE() SetEvent (profiler->statistical_data_writer_event)
+#define WRITER_EVENT_INIT() (void) do {\
+       CloseHandle (profiler->enable_data_writer_event);\
+       CloseHandle (profiler->wake_data_writer_event);\
+       CloseHandle (profiler->done_data_writer_event);\
+} while (0)
+#define WRITER_EVENT_WAIT() WaitForSingleObject (profiler->wake_data_writer_event, INFINITE)
+#define WRITER_EVENT_RAISE() SetEvent (profiler->wake_data_writer_event)
+#define WRITER_EVENT_ENABLE_WAIT() WaitForSingleObject (profiler->enable_data_writer_event, INFINITE)
+#define WRITER_EVENT_ENABLE_RAISE() SetEvent (profiler->enable_data_writer_event)
+#define WRITER_EVENT_DONE_WAIT() do {\
+       if (CHECK_WRITER_THREAD ()) {\
+               WaitForSingleObject (profiler->done_data_writer_event, INFINITE);\
+       }\
+} while (0)
+#define WRITER_EVENT_DONE_RAISE() SetEvent (profiler->done_data_writer_event)
 
 #define FILE_HANDLE_TYPE FILE*
 #define OPEN_FILE() profiler->file = fopen (profiler->file_name, "wb");
@@ -707,10 +785,15 @@ typedef struct _ProfilerFileWriteBuffer {
        guint8 buffer [];
 } ProfilerFileWriteBuffer;
 
+#define CHECK_PROFILER_ENABLED() do {\
+       if (! profiler->profiler_enabled)\
+               return;\
+} while (0)
 struct _MonoProfiler {
        MUTEX_TYPE mutex;
        
        MonoProfileFlags flags;
+       gboolean profiler_enabled;
        char *file_name;
        char *file_name_suffix;
        FILE_HANDLE_TYPE file;
@@ -738,9 +821,14 @@ struct _MonoProfiler {
        int statistical_call_chain_depth;
        
        THREAD_TYPE data_writer_thread;
-       EVENT_TYPE statistical_data_writer_event;
+       EVENT_TYPE enable_data_writer_event;
+       EVENT_TYPE wake_data_writer_event;
+       EVENT_TYPE done_data_writer_event;
        gboolean terminate_writer_thread;
+       gboolean writer_thread_terminated;
        gboolean detach_writer_thread;
+       gboolean writer_thread_enabled;
+       gboolean writer_thread_flush_everything;
        
        ProfilerFileWriteBuffer *write_buffers;
        ProfilerFileWriteBuffer *current_write_buffer;
@@ -768,6 +856,11 @@ struct _MonoProfiler {
                gboolean unreachable_objects;
                gboolean collection_summary;
                gboolean heap_shot;
+               gboolean track_stack;
+               gboolean track_calls;
+               gboolean save_allocation_caller;
+               gboolean save_allocation_stack;
+               gboolean allocations_carry_id;
        } action_flags;
 };
 static MonoProfiler *profiler;
@@ -783,6 +876,12 @@ static MonoProfiler *profiler;
 #define SIG_HANDLER_SIGNATURE(ftn) ftn (int _dummy)
 #endif
 
+static void
+request_heap_snapshot (void) {
+       profiler->heap_shot_was_signalled = TRUE;
+       mono_gc_collect (mono_gc_max_generation ());
+}
+
 static void
 SIG_HANDLER_SIGNATURE (gc_request_handler) {
        profiler->heap_shot_was_signalled = TRUE;
@@ -806,6 +905,45 @@ add_gc_request_handler (int signal_number)
        
        g_assert (sigaction (signal_number, &sa, NULL) != -1);
 }
+
+static void
+enable_profiler (void) {
+       profiler->profiler_enabled = TRUE;
+}
+
+static void
+disable_profiler (void) {
+       profiler->profiler_enabled = FALSE;
+}
+
+
+
+static void
+SIG_HANDLER_SIGNATURE (toggle_handler) {
+       if (profiler->profiler_enabled) {
+               profiler->profiler_enabled = FALSE;
+       } else {
+               profiler->profiler_enabled = TRUE;
+       }
+}
+
+static void
+add_toggle_handler (int signal_number)
+{
+       struct sigaction sa;
+       
+#ifdef MONO_ARCH_USE_SIGACTION
+       sa.sa_sigaction = toggle_handler;
+       sigemptyset (&sa.sa_mask);
+       sa.sa_flags = SA_SIGINFO;
+#else
+       sa.sa_handler = toggle_handler;
+       sigemptyset (&sa.sa_mask);
+       sa.sa_flags = 0;
+#endif
+       
+       g_assert (sigaction (signal_number, &sa, NULL) != -1);
+}
 #endif
 
 
@@ -817,7 +955,8 @@ add_gc_request_handler (int signal_number)
 #define DEBUG_CLASS_BITMAPS 0
 #define DEBUG_STATISTICAL_PROFILER 0
 #define DEBUG_WRITER_THREAD 0
-#if (DEBUG_LOGGING_PROFILER || DEBUG_STATISTICAL_PROFILER || DEBUG_HEAP_PROFILER || DEBUG_WRITER_THREAD)
+#define DEBUG_FILE_WRITES 0
+#if (DEBUG_LOGGING_PROFILER || DEBUG_STATISTICAL_PROFILER || DEBUG_HEAP_PROFILER || DEBUG_WRITER_THREAD || DEBUG_FILE_WRITES)
 #define LOG_WRITER_THREAD(m) printf ("WRITER-THREAD-LOG %s\n", m)
 #else
 #define LOG_WRITER_THREAD(m)
@@ -828,6 +967,166 @@ static int event_counter = 0;
 #define EVENT_MARK() printf ("[EVENT:%d]", ++ event_counter)
 #endif
 
+static void
+thread_stack_initialize_empty (ProfilerThreadStack *stack) {
+       stack->capacity = 0;
+       stack->top = 0;
+       stack->last_saved_top = 0;
+       stack->last_written_frame = 0;
+       stack->stack = NULL;
+       stack->method_is_jitted = NULL;
+       stack->written_frames = NULL;
+}
+
+static void
+thread_stack_free (ProfilerThreadStack *stack) {
+       stack->capacity = 0;
+       stack->top = 0;
+       stack->last_saved_top = 0;
+       stack->last_written_frame = 0;
+       if (stack->stack != NULL) {
+               g_free (stack->stack);
+               stack->stack = NULL;
+       }
+       if (stack->method_is_jitted != NULL) {
+               g_free (stack->method_is_jitted);
+               stack->method_is_jitted = NULL;
+       }
+       if (stack->written_frames != NULL) {
+               g_free (stack->written_frames);
+               stack->written_frames = NULL;
+       }
+}
+
+static void
+thread_stack_initialize (ProfilerThreadStack *stack, guint32 capacity) {
+       stack->capacity = capacity;
+       stack->top = 0;
+       stack->last_saved_top = 0;
+       stack->last_written_frame = 0;
+       stack->stack = g_new0 (MonoMethod*, capacity);
+       stack->method_is_jitted = g_new0 (guint8, capacity);
+       stack->written_frames = g_new0 (guint32, capacity);
+}
+
+static void
+thread_stack_push_jitted (ProfilerThreadStack *stack, MonoMethod* method, gboolean method_is_jitted) {
+       if (stack->top >= stack->capacity) {
+               MonoMethod **old_stack = stack->stack;
+               guint8 *old_method_is_jitted = stack->method_is_jitted;
+               guint32 *old_written_frames = stack->written_frames;
+               guint32 top = stack->top;
+               guint32 last_saved_top = stack->last_saved_top;
+               guint32 last_written_frame = stack->last_written_frame;
+               thread_stack_initialize (stack, stack->capacity * 2);
+               memcpy (stack->stack, old_stack, top * sizeof (MonoMethod*));
+               memcpy (stack->method_is_jitted, old_method_is_jitted, top * sizeof (guint8));
+               memcpy (stack->written_frames, old_written_frames, top * sizeof (guint32));
+               g_free (old_stack);
+               g_free (old_method_is_jitted);
+               g_free (old_written_frames);
+               stack->top = top;
+               stack->last_saved_top = last_saved_top;
+               stack->last_written_frame = last_written_frame;
+       }
+       stack->stack [stack->top] = method;
+       stack->method_is_jitted [stack->top] = method_is_jitted;
+       stack->top ++;
+}
+
+static inline void
+thread_stack_push (ProfilerThreadStack *stack, MonoMethod* method) {
+       thread_stack_push_jitted (stack, method, FALSE);
+}
+
+static MonoMethod*
+thread_stack_pop (ProfilerThreadStack *stack) {
+       if (stack->top > 0) {
+               stack->top --;
+               if (stack->last_saved_top > stack->top) {
+                       stack->last_saved_top = stack->top;
+               }
+               return stack->stack [stack->top];
+       } else {
+               return NULL;
+       }
+}
+
+static MonoMethod*
+thread_stack_top (ProfilerThreadStack *stack) {
+       if (stack->top > 0) {
+               return stack->stack [stack->top - 1];
+       } else {
+               return NULL;
+       }
+}
+
+static gboolean
+thread_stack_top_is_jitted (ProfilerThreadStack *stack) {
+       if (stack->top > 0) {
+               return stack->method_is_jitted [stack->top - 1];
+       } else {
+               return FALSE;
+       }
+}
+
+static MonoMethod*
+thread_stack_index_from_top (ProfilerThreadStack *stack, int index) {
+       if (stack->top > index) {
+               return stack->stack [stack->top - (index + 1)];
+       } else {
+               return NULL;
+       }
+}
+
+static gboolean
+thread_stack_index_from_top_is_jitted (ProfilerThreadStack *stack, int index) {
+       if (stack->top > index) {
+               return stack->method_is_jitted [stack->top - (index + 1)];
+       } else {
+               return FALSE;
+       }
+}
+
+static inline void
+thread_stack_push_safely (ProfilerThreadStack *stack, MonoMethod* method) {
+       if (stack->stack != NULL) {
+               thread_stack_push (stack, method);
+       }
+}
+
+static inline void
+thread_stack_push_jitted_safely (ProfilerThreadStack *stack, MonoMethod* method, gboolean method_is_jitted) {
+       if (stack->stack != NULL) {
+               thread_stack_push_jitted (stack, method, method_is_jitted);
+       }
+}
+
+static inline int
+thread_stack_count_unsaved_frames (ProfilerThreadStack *stack) {
+       int result = stack->top - stack->last_saved_top;
+       return (result > 0) ? result : 0;
+}
+
+static inline int
+thread_stack_get_last_written_frame (ProfilerThreadStack *stack) {
+       return stack->last_written_frame;
+}
+
+static inline void
+thread_stack_set_last_written_frame (ProfilerThreadStack *stack, int last_written_frame) {
+       stack->last_written_frame = last_written_frame;
+}
+
+static inline guint32
+thread_stack_written_frame_at_index (ProfilerThreadStack *stack, int index) {
+       return stack->written_frames [index];
+}
+
+static inline void
+thread_stack_write_frame_at_index (ProfilerThreadStack *stack, int index, guint32 method_id_and_is_jitted) {
+       stack->written_frames [index] = method_id_and_is_jitted;
+}
 
 static ClassIdMappingElement*
 class_id_mapping_element_get (MonoClass *klass) {
@@ -849,7 +1148,7 @@ static ClassIdMappingElement*
 class_id_mapping_element_new (MonoClass *klass) {
        ClassIdMappingElement *result = g_new (ClassIdMappingElement, 1);
        
-       result->name = g_strdup_printf ("%s.%s", mono_class_get_namespace (klass), mono_class_get_name (klass));
+       result->name = mono_type_full_name (mono_class_get_type (klass));
        result->klass = klass;
        result->next_unwritten = profiler->classes->unwritten;
        profiler->classes->unwritten = result;
@@ -1329,7 +1628,6 @@ profiler_heap_shot_write_job_new (gboolean heap_shot_was_signalled, gboolean dum
                job->end = & (job->buffers->buffer [PROFILER_HEAP_SHOT_WRITE_BUFFER_SIZE]);
        } else {
                job->buffers = NULL;
-               job->buffers->next = NULL;
                job->last_next = NULL;
                job->start = NULL;
                job->cursor = NULL;
@@ -1349,11 +1647,16 @@ profiler_heap_shot_write_job_new (gboolean heap_shot_was_signalled, gboolean dum
        job->collection = collection;
        job->dump_heap_data = dump_heap_data;
 #if DEBUG_HEAP_PROFILER
-       printf ("profiler_heap_shot_write_job_new: created job %p with buffer %p(%p-%p)\n", job, job->buffers, job->start, job->end);
+       printf ("profiler_heap_shot_write_job_new: created job %p with buffer %p(%p-%p) (collection %d, dump %d)\n", job, job->buffers, job->start, job->end, collection, dump_heap_data);
 #endif
        return job;
 }
 
+static gboolean
+profiler_heap_shot_write_job_has_data (ProfilerHeapShotWriteJob *job) {
+       return ((job->buffers != NULL) || (job->summary.capacity > 0));
+}
+
 static void
 profiler_heap_shot_write_job_add_buffer (ProfilerHeapShotWriteJob *job, gpointer value) {
        ProfilerHeapShotWriteBuffer *buffer = g_new (ProfilerHeapShotWriteBuffer, 1);
@@ -1415,15 +1718,15 @@ profiler_process_heap_shot_write_jobs (void) {
                        next_job = current_job->next_unwritten;
                        
                        if (next_job != NULL) {
-                               if (current_job->buffers != NULL) {
+                               if (profiler_heap_shot_write_job_has_data (current_job)) {
                                        done = FALSE;
                                }
-                               if (next_job->buffers == NULL) {
+                               if (! profiler_heap_shot_write_job_has_data (next_job)) {
                                        current_job->next_unwritten = NULL;
                                        next_job = NULL;
                                }
                        } else {
-                               if (current_job->buffers != NULL) {
+                               if (profiler_heap_shot_write_job_has_data (current_job)) {
                                        LOG_WRITER_THREAD ("profiler_process_heap_shot_write_jobs: writing...");
                                        profiler_heap_shot_write_block (current_job);
                                        LOG_WRITER_THREAD ("profiler_process_heap_shot_write_jobs: done");
@@ -1530,6 +1833,11 @@ profiler_per_thread_data_new (guint32 buffer_size)
                        (profiler->action_flags.collection_summary == TRUE)) {
                profiler_heap_shot_object_buffer_new (data);
        }
+       if (profiler->action_flags.track_stack) {
+               thread_stack_initialize (&(data->stack), 64);
+       } else {
+               thread_stack_initialize_empty (&(data->stack));
+       }
        return data;
 }
 
@@ -1537,6 +1845,7 @@ static void
 profiler_per_thread_data_destroy (ProfilerPerThreadData *data) {
        g_free (data->events);
        profiler_heap_shot_object_buffers_destroy (data->heap_shot_object_buffers);
+       thread_stack_free (&(data->stack));
        g_free (data);
 }
 
@@ -1545,9 +1854,9 @@ profiler_statistical_data_new (MonoProfiler *profiler) {
        int buffer_size = profiler->statistical_buffer_size * (profiler->statistical_call_chain_depth + 1);
        ProfilerStatisticalData *data = g_new (ProfilerStatisticalData, 1);
 
-       data->addresses = g_new0 (gpointer, buffer_size);
+       data->hits = g_new0 (ProfilerStatisticalHit, buffer_size);
        data->next_free_index = 0;
-       data->end_index = buffer_size;
+       data->end_index = profiler->statistical_buffer_size;
        data->first_unwritten_index = 0;
        
        return data;
@@ -1555,7 +1864,7 @@ profiler_statistical_data_new (MonoProfiler *profiler) {
 
 static void
 profiler_statistical_data_destroy (ProfilerStatisticalData *data) {
-       g_free (data->addresses);
+       g_free (data->hits);
        g_free (data);
 }
 
@@ -1622,17 +1931,29 @@ write_current_block (guint16 code) {
        header [8] = (counter_delta >> 16) & 0xff;
        header [9] = (counter_delta >> 24) & 0xff;
        
+#if (DEBUG_FILE_WRITES)
+       printf ("write_current_block: writing header (code %d)\n", code);
+#endif
        WRITE_BUFFER (& (header [0]), 10);
        
        while ((current_buffer != NULL) && (profiler->full_write_buffers > 0)) {
+#if (DEBUG_FILE_WRITES)
+               printf ("write_current_block: writing buffer (size %d)\n", PROFILER_FILE_WRITE_BUFFER_SIZE);
+#endif
                WRITE_BUFFER (& (current_buffer->buffer [0]), PROFILER_FILE_WRITE_BUFFER_SIZE);
                profiler->full_write_buffers --;
                current_buffer = current_buffer->next;
        }
        if (profiler->current_write_position > 0) {
+#if (DEBUG_FILE_WRITES)
+               printf ("write_current_block: writing last buffer (size %d)\n", profiler->current_write_position);
+#endif
                WRITE_BUFFER (& (current_buffer->buffer [0]), profiler->current_write_position);
        }
        FLUSH_FILE ();
+#if (DEBUG_FILE_WRITES)
+       printf ("write_current_block: buffers flushed\n");
+#endif
        
        profiler->current_write_buffer = profiler->write_buffers;
        profiler->current_write_position = 0;
@@ -1668,6 +1989,28 @@ write_string (const char *string) {
        WRITE_BYTE (0);
 }
 
+static void write_clock_data (void);
+static void
+write_directives_block (gboolean start) {
+       write_clock_data ();
+       
+       if (start) {
+               if (profiler->action_flags.save_allocation_caller) {
+                       write_uint32 (MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_CALLER);
+               }
+               if (profiler->action_flags.save_allocation_stack || profiler->action_flags.track_calls) {
+                       write_uint32 (MONO_PROFILER_DIRECTIVE_ALLOCATIONS_HAVE_STACK);
+               }
+               if (profiler->action_flags.allocations_carry_id) {
+                       write_uint32 (MONO_PROFILER_DIRECTIVE_ALLOCATIONS_CARRY_ID);
+               }
+       }
+       write_uint32 (MONO_PROFILER_DIRECTIVE_END);
+       
+       write_clock_data ();
+       write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_DIRECTIVES);
+}
+
 #if DEBUG_HEAP_PROFILER
 #define WRITE_HEAP_SHOT_JOB_VALUE_MESSAGE(v,c) printf ("WRITE_HEAP_SHOT_JOB_VALUE: writing value %p at cursor %p\n", (v), (c))
 #else
@@ -1741,7 +2084,7 @@ profiler_heap_shot_write_data_block (ProfilerHeapShotWriteJob *job) {
        write_uint64 (start_counter);
        write_uint64 (start_time);
 #if DEBUG_HEAP_PROFILER
-       printf ("profiler_heap_shot_write_data_block: start writing job %p...\n", job);
+       printf ("profiler_heap_shot_write_data_block: start writing job %p (start %p, end %p)...\n", job, & (job->buffers->buffer [0]), job->cursor);
 #endif
        buffer = job->buffers;
        cursor = & (buffer->buffer [0]);
@@ -1934,7 +2277,7 @@ write_mapping_block (gsize thread_id) {
        if ((profiler->classes->unwritten == NULL) && (profiler->methods->unwritten == NULL))
                return;
        
-#if (DEBUG_MAPPING_EVENTS)
+#if (DEBUG_MAPPING_EVENTS || DEBUG_FILE_WRITES)
        printf ("[write_mapping_block][TID %ld] START\n", thread_id);
 #endif
        
@@ -1973,7 +2316,7 @@ write_mapping_block (gsize thread_id) {
        write_clock_data ();
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_MAPPING);
        
-#if (DEBUG_MAPPING_EVENTS)
+#if (DEBUG_MAPPING_EVENTS || DEBUG_FILE_WRITES)
        printf ("[write_mapping_block][TID %ld] END\n", thread_id);
 #endif
 }
@@ -1999,13 +2342,74 @@ typedef enum {
        result = ((base)|((((kind)<<4) | (code)) << MONO_PROFILER_PACKED_EVENT_CODE_BITS));\
 } while (0)
 
+static void
+rewrite_last_written_stack (ProfilerThreadStack *stack) {
+       guint8 event_code;
+       int i = thread_stack_get_last_written_frame (stack);
+       
+       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, MONO_PROFILER_EVENT_STACK_SECTION, 0, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+       WRITE_BYTE (event_code);
+       write_uint32 (0);
+       write_uint32 (i);
+       
+       while (i > 0) {
+               i--;
+               write_uint32 (thread_stack_written_frame_at_index (stack, i));
+       }
+}
+
+
+static ProfilerEventData*
+write_stack_section_event (ProfilerEventData *events, ProfilerPerThreadData *data) {
+       int last_saved_frame = events->data.number;
+       int saved_frames = events->value;
+       guint8 event_code;
+       int i;
+       
+       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, MONO_PROFILER_EVENT_STACK_SECTION, 0, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+       WRITE_BYTE (event_code);
+       write_uint32 (last_saved_frame);
+       write_uint32 (saved_frames);
+       thread_stack_set_last_written_frame (&(data->stack), last_saved_frame + saved_frames);
+       events++;
+       
+       for (i = 0; i < saved_frames; i++) {
+               guint8 code = events->code;
+               guint32 jit_flag;
+               MethodIdMappingElement *method;
+               guint32 frame_value;
+               
+               if (code == MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER) {
+                       jit_flag = 0;
+               } else if (code == MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER) {
+                       jit_flag = 1;
+               } else {
+                       g_assert_not_reached ();
+                       jit_flag = 0;
+               }
+               
+               method = method_id_mapping_element_get (events->data.address);
+               g_assert (method != NULL);
+               frame_value = (method->id << 1) | jit_flag;
+               write_uint32 (frame_value);
+               thread_stack_write_frame_at_index (&(data->stack), last_saved_frame + saved_frames - (1 + i), frame_value);
+               events ++;
+       }
+       
+       return events;
+}
+
 static ProfilerEventData*
-write_event (ProfilerEventData *event) {
+write_event (ProfilerEventData *event, ProfilerPerThreadData *data) {
        ProfilerEventData *next = event + 1;
        gboolean write_event_value = TRUE;
        guint8 event_code;
        guint64 event_data;
        guint64 event_value;
+       gboolean write_event_value_extension_1 = FALSE;
+       guint64 event_value_extension_1 = 0;
+       gboolean write_event_value_extension_2 = FALSE;
+       guint64 event_value_extension_2 = 0;
 
        event_value = event->value;
        if (event_value == MAX_EVENT_VALUE) {
@@ -2033,13 +2437,54 @@ write_event (ProfilerEventData *event) {
                event_data = element->id;
                
                if (event->code == MONO_PROFILER_EVENT_CLASS_ALLOCATION) {
-                       MONO_PROFILER_EVENT_MAKE_PACKED_CODE (event_code, event_data, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_ALLOCATION);
+                       if ((! profiler->action_flags.save_allocation_caller) || (! (next->code == MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER))) {
+                               MONO_PROFILER_EVENT_MAKE_PACKED_CODE (event_code, event_data, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_ALLOCATION);
+                       } else {
+                               MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, MONO_PROFILER_EVENT_JIT_TIME_ALLOCATION, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+                       }
+                       
+                       if (profiler->action_flags.save_allocation_caller) {
+                               MonoMethod *caller_method = next->data.address;
+                               
+                               if ((next->code != MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER) && (next->code != MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER)) {
+                                       g_assert_not_reached ();
+                               }
+                               
+                               if (caller_method != NULL) {
+                                       MethodIdMappingElement *caller = method_id_mapping_element_get (caller_method);
+                                       g_assert (caller != NULL);
+                                       event_value_extension_1 = caller->id;
+                               }
+
+                               write_event_value_extension_1 = TRUE;
+                               next ++;
+                       }
+                       
+                       if (profiler->action_flags.allocations_carry_id) {
+                               event_value_extension_2  = GPOINTER_TO_UINT (next->data.address);
+                               
+                               if (next->code != MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID) {
+                                       g_assert_not_reached ();
+                               }
+                               
+                               write_event_value_extension_2 = TRUE;
+                               next ++;
+                       }
                } else {
                        MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_CLASS_EVENT);
                }
        } else {
-               event_data = event->data.number;
-               MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+               if (event->code == MONO_PROFILER_EVENT_STACK_SECTION) {
+                       return write_stack_section_event (event, data);
+               } else {
+                       event_data = event->data.number;
+                       MONO_PROFILER_EVENT_MAKE_FULL_CODE (event_code, event->code, event->kind, MONO_PROFILER_PACKED_EVENT_CODE_OTHER_EVENT);
+               }
+       }
+       
+       /* Skip writing JIT events if the user did not ask for them */
+       if ((event->code == MONO_PROFILER_EVENT_METHOD_JIT) && ! profiler->action_flags.jit_time) {
+               return next;
        }
        
 #if (DEBUG_LOGGING_PROFILER)
@@ -2053,6 +2498,12 @@ write_event (ProfilerEventData *event) {
        write_uint64 (event_data);
        if (write_event_value) {
                write_uint64 (event_value);
+               if (write_event_value_extension_1) {
+                       write_uint64 (event_value_extension_1);
+               }
+               if (write_event_value_extension_2) {
+                       write_uint64 (event_value_extension_2);
+               }
        }
        
        return next;
@@ -2065,20 +2516,28 @@ write_thread_data_block (ProfilerPerThreadData *data) {
        
        if (start == end)
                return;
-       
+#if (DEBUG_FILE_WRITES)
+       printf ("write_thread_data_block: preparing buffer for thread %ld\n", (guint64) data->thread_id);
+#endif
        write_clock_data ();
        write_uint64 (data->thread_id);
        
        write_uint64 (data->start_event_counter);
        
+       /* Make sure that stack sections can be fully reconstructed even reading only one block */
+       rewrite_last_written_stack (&(data->stack));
+       
        while (start < end) {
-               start = write_event (start);
+               start = write_event (start, data);
        }
        WRITE_BYTE (0);
        data->first_unwritten_event = end;
        
        write_clock_data ();
        write_current_block (MONO_PROFILER_FILE_BLOCK_KIND_EVENTS);
+#if (DEBUG_FILE_WRITES)
+       printf ("write_thread_data_block: buffer for thread %ld written\n", (guint64) data->thread_id);
+#endif
 }
 
 static ProfilerExecutableMemoryRegionData*
@@ -2092,6 +2551,7 @@ profiler_executable_memory_region_new (gpointer *start, gpointer *end, guint32 f
        result->is_new = TRUE;
        
        result->file = NULL;
+       result->file_region_reference = NULL;
        result->symbols_capacity = id;
        result->symbols_count = id;
        result->symbols = NULL;
@@ -2104,14 +2564,17 @@ executable_file_close (ProfilerExecutableMemoryRegionData *region);
 
 static void
 profiler_executable_memory_region_destroy (ProfilerExecutableMemoryRegionData *data) {
-       if (data->file_name != NULL) {
-               g_free (data->file_name);
+       if (data->file != NULL) {
+               executable_file_close (data);
+               data->file = NULL;
        }
        if (data->symbols != NULL) {
                g_free (data->symbols);
+               data->symbols = NULL;
        }
-       if (data->file != NULL) {
-               executable_file_close (data);
+       if (data->file_name != NULL) {
+               g_free (data->file_name);
+               data->file_name = NULL;
        }
        g_free (data);
 }
@@ -2195,19 +2658,35 @@ append_region (ProfilerExecutableMemoryRegions *regions, gpointer *start, gpoint
        regions->next_id ++;
 }
 
+static gboolean
+regions_are_equivalent (ProfilerExecutableMemoryRegionData *region1, ProfilerExecutableMemoryRegionData *region2) {
+       if ((region1->start == region2->start) &&
+                       (region1->end == region2->end) &&
+                       (region1->file_offset == region2->file_offset) &&
+                       ! strcmp (region1->file_name, region2->file_name)) {
+               return TRUE;
+       } else {
+               return FALSE;
+       }
+}
+
+static int
+compare_regions (const void *a1, const void *a2) {
+       ProfilerExecutableMemoryRegionData *r1 = * (ProfilerExecutableMemoryRegionData**) a1;
+       ProfilerExecutableMemoryRegionData *r2 = * (ProfilerExecutableMemoryRegionData**) a2;
+       return (r1->start < r2->start)? -1 : ((r1->start > r2->start)? 1 : 0);
+}
+
 static void
 restore_old_regions (ProfilerExecutableMemoryRegions *old_regions, ProfilerExecutableMemoryRegions *new_regions) {
        int old_i;
        int new_i;
        
-       for (old_i = 0; old_i < old_regions->regions_count; old_i++) {
-               ProfilerExecutableMemoryRegionData *old_region = old_regions->regions [old_i];
-               for (new_i = 0; new_i < new_regions->regions_count; new_i++) {
-                       ProfilerExecutableMemoryRegionData *new_region = new_regions->regions [new_i];
-                       if ((old_region->start == new_region->start) &&
-                                       (old_region->end == new_region->end) &&
-                                       (old_region->file_offset == new_region->file_offset) &&
-                                       ! strcmp (old_region->file_name, new_region->file_name)) {
+       for (new_i = 0; new_i < new_regions->regions_count; new_i++) {
+               ProfilerExecutableMemoryRegionData *new_region = new_regions->regions [new_i];
+               for (old_i = 0; old_i < old_regions->regions_count; old_i++) {
+                       ProfilerExecutableMemoryRegionData *old_region = old_regions->regions [old_i];
+                       if ( regions_are_equivalent (old_region, new_region)) {
                                new_regions->regions [new_i] = old_region;
                                old_regions->regions [old_i] = new_region;
                                
@@ -2218,16 +2697,49 @@ restore_old_regions (ProfilerExecutableMemoryRegions *old_regions, ProfilerExecu
        }
 }
 
-static int
-compare_regions (const void *a1, const void *a2) {
-       ProfilerExecutableMemoryRegionData *r1 = * (ProfilerExecutableMemoryRegionData**) a1;
-       ProfilerExecutableMemoryRegionData *r2 = * (ProfilerExecutableMemoryRegionData**) a2;
-       return (r1->start < r2->start)? -1 : ((r1->start > r2->start)? 1 : 0);
+static void
+sort_regions (ProfilerExecutableMemoryRegions *regions) {
+       if (regions->regions_count > 1) {
+               int i;
+               
+               qsort (regions->regions, regions->regions_count, sizeof (ProfilerExecutableMemoryRegionData *), compare_regions);
+               
+               i = 1;
+               while (i < regions->regions_count) {
+                       ProfilerExecutableMemoryRegionData *current_region = regions->regions [i];
+                       ProfilerExecutableMemoryRegionData *previous_region = regions->regions [i - 1];
+                       
+                       if (regions_are_equivalent (previous_region, current_region)) {
+                               int j;
+                               
+                               if (! current_region->is_new) {
+                                       profiler_executable_memory_region_destroy (previous_region);
+                                       regions->regions [i - 1] = current_region;
+                               } else {
+                                       profiler_executable_memory_region_destroy (current_region);
+                               }
+                               
+                               for (j = i + 1; j < regions->regions_count; j++) {
+                                       regions->regions [j - 1] = regions->regions [j];
+                               }
+                               
+                               regions->regions_count --;
+                       } else {
+                               i++;
+                       }
+               }
+       }
 }
 
 static void
-sort_regions (ProfilerExecutableMemoryRegions *regions) {
-       qsort (regions->regions, regions->regions_count, sizeof (ProfilerExecutableMemoryRegionData *), compare_regions);
+fix_region_references (ProfilerExecutableMemoryRegions *regions) {
+       int i;
+       for (i = 0; i < regions->regions_count; i++) {
+               ProfilerExecutableMemoryRegionData *region = regions->regions [i];
+               if (region->file_region_reference != NULL) {
+                       region->file_region_reference->region = region;
+               }
+       }
 }
 
 static void
@@ -2244,6 +2756,7 @@ executable_file_add_region_reference (ProfilerExecutableFile *file, ProfilerExec
                        section_region->region = region;
                        section_region->section_address = (gpointer) section_header->sh_addr;
                        section_region->section_offset = section_header->sh_offset;
+                       region->file_region_reference = section_region;
                }
        }
 }
@@ -2251,143 +2764,147 @@ executable_file_add_region_reference (ProfilerExecutableFile *file, ProfilerExec
 static ProfilerExecutableFile*
 executable_file_open (ProfilerExecutableMemoryRegionData *region) {
        ProfilerExecutableFiles *files = & (profiler->executable_files);
-       ProfilerExecutableFile *file = (ProfilerExecutableFile*) g_hash_table_lookup (files->table, region->file_name);
+       ProfilerExecutableFile *file = region->file;
+       
        if (file == NULL) {
-               guint16 test = 0x0102;
-               struct stat stat_buffer;
-               int symtab_index = 0;
-               int strtab_index = 0;
-               int dynsym_index = 0;
-               int dynstr_index = 0;
-               ElfHeader *header;
-               guint8 *section_headers;
-               int section_index;
-               int strings_index;
+               file = (ProfilerExecutableFile*) g_hash_table_lookup (files->table, region->file_name);
                
-               file = g_new0 (ProfilerExecutableFile, 1);
-               region->file = file;
-               file->reference_count ++;
-               
-               file->fd = open (region->file_name, O_RDONLY);
-               if (file->fd == -1) {
-                       //g_warning ("Cannot open file '%s': '%s'", region->file_name, strerror (errno));
-                       return file;
-               } else {
-                       if (fstat (file->fd, &stat_buffer) != 0) {
-                               //g_warning ("Cannot stat file '%s': '%s'", region->file_name, strerror (errno));
+               if (file == NULL) {
+                       guint16 test = 0x0102;
+                       struct stat stat_buffer;
+                       int symtab_index = 0;
+                       int strtab_index = 0;
+                       int dynsym_index = 0;
+                       int dynstr_index = 0;
+                       ElfHeader *header;
+                       guint8 *section_headers;
+                       int section_index;
+                       int strings_index;
+                       
+                       file = g_new0 (ProfilerExecutableFile, 1);
+                       region->file = file;
+                       g_hash_table_insert (files->table, region->file_name, file);
+                       file->reference_count ++;
+                       file->next_new_file = files->new_files;
+                       files->new_files = file;
+                       
+                       file->fd = open (region->file_name, O_RDONLY);
+                       if (file->fd == -1) {
+                               //g_warning ("Cannot open file '%s': '%s'", region->file_name, strerror (errno));
                                return file;
                        } else {
-                               size_t region_length = ((guint8*)region->end) - ((guint8*)region->start);
-                               file->length = stat_buffer.st_size;
-                               
-                               if (file->length == region_length) {
-                                       file->data = region->start;
-                                       close (file->fd);
-                                       file->fd = -1;
+                               if (fstat (file->fd, &stat_buffer) != 0) {
+                                       //g_warning ("Cannot stat file '%s': '%s'", region->file_name, strerror (errno));
+                                       return file;
                                } else {
-                                       file->data = mmap (NULL, file->length, PROT_READ, MAP_PRIVATE, file->fd, 0);
+                                       size_t region_length = ((guint8*)region->end) - ((guint8*)region->start);
+                                       file->length = stat_buffer.st_size;
                                        
-                                       if (file->data == MAP_FAILED) {
+                                       if (file->length == region_length) {
+                                               file->data = region->start;
                                                close (file->fd);
-                                               //g_warning ("Cannot map file '%s': '%s'", region->file_name, strerror (errno));
-                                               file->data = NULL;
-                                               return file;
+                                               file->fd = -1;
+                                       } else {
+                                               file->data = mmap (NULL, file->length, PROT_READ, MAP_PRIVATE, file->fd, 0);
+                                               
+                                               if (file->data == MAP_FAILED) {
+                                                       close (file->fd);
+                                                       //g_warning ("Cannot map file '%s': '%s'", region->file_name, strerror (errno));
+                                                       file->data = NULL;
+                                                       return file;
+                                               }
                                        }
                                }
                        }
-               }
-               
-               header = (ElfHeader*) file->data;
-               
-               if ((header->e_ident [EI_MAG0] != 0x7f) || (header->e_ident [EI_MAG1] != 'E') ||
-                               (header->e_ident [EI_MAG2] != 'L') || (header->e_ident [EI_MAG3] != 'F')) {
-                       return file;
-               }
-               
-               if (sizeof (gsize) == 4) {
-                       if (header->e_ident [EI_CLASS] != ELF_CLASS_32) {
-                               g_warning ("Class is not ELF_CLASS_32 with gsize size %d", (int) sizeof (gsize));
-                               return file;
-                       }
-               } else if (sizeof (gsize) == 8) {
-                       if (header->e_ident [EI_CLASS] != ELF_CLASS_64) {
-                               g_warning ("Class is not ELF_CLASS_64 with gsize size %d", (int) sizeof (gsize));
+                       
+                       header = (ElfHeader*) file->data;
+                       
+                       if ((header->e_ident [EI_MAG0] != 0x7f) || (header->e_ident [EI_MAG1] != 'E') ||
+                                       (header->e_ident [EI_MAG2] != 'L') || (header->e_ident [EI_MAG3] != 'F')) {
                                return file;
                        }
-               } else {
-                       g_warning ("Absurd gsize size %d", (int) sizeof (gsize));
-                       return file;
-               }
-               
-               if ((*(guint8*)(&test)) == 0x01) {
-                       if (header->e_ident [EI_DATA] != ELF_DATA_MSB) {
-                               g_warning ("Data is not ELF_DATA_MSB with first test byte 0x01");
+                       
+                       if (sizeof (gsize) == 4) {
+                               if (header->e_ident [EI_CLASS] != ELF_CLASS_32) {
+                                       g_warning ("Class is not ELF_CLASS_32 with gsize size %d", (int) sizeof (gsize));
+                                       return file;
+                               }
+                       } else if (sizeof (gsize) == 8) {
+                               if (header->e_ident [EI_CLASS] != ELF_CLASS_64) {
+                                       g_warning ("Class is not ELF_CLASS_64 with gsize size %d", (int) sizeof (gsize));
+                                       return file;
+                               }
+                       } else {
+                               g_warning ("Absurd gsize size %d", (int) sizeof (gsize));
                                return file;
                        }
-               } else if ((*(guint8*)(&test)) == 0x02) {
-                       if (header->e_ident [EI_DATA] != ELF_DATA_LSB) {
-                               g_warning ("Data is not ELF_DATA_LSB with first test byte 0x02");
+                       
+                       if ((*(guint8*)(&test)) == 0x01) {
+                               if (header->e_ident [EI_DATA] != ELF_DATA_MSB) {
+                                       g_warning ("Data is not ELF_DATA_MSB with first test byte 0x01");
+                                       return file;
+                               }
+                       } else if ((*(guint8*)(&test)) == 0x02) {
+                               if (header->e_ident [EI_DATA] != ELF_DATA_LSB) {
+                                       g_warning ("Data is not ELF_DATA_LSB with first test byte 0x02");
+                                       return file;
+                               }
+                       } else {
+                               g_warning ("Absurd test byte value");
                                return file;
                        }
-               } else {
-                       g_warning ("Absurd test byte value");
-                       return file;
-               }
-               
-               /* OK, this is a usable elf file... */
-               file->header = header;
-               section_headers = file->data + header->e_shoff;
-               file->main_string_table = ((const char*) file->data) + (((ElfSection*) (section_headers + (header->e_shentsize * header->e_shstrndx)))->sh_offset);
-               
-               for (section_index = 0; section_index < header->e_shnum; section_index ++) {
-                       ElfSection *section_header = (ElfSection*) (section_headers + (header->e_shentsize * section_index));
                        
-                       if (section_header->sh_type == ELF_SHT_SYMTAB) {
-                               symtab_index = section_index;
-                       } else if (section_header->sh_type == ELF_SHT_DYNSYM) {
-                               dynsym_index = section_index;
-                       } else if (section_header->sh_type == ELF_SHT_STRTAB) {
-                               if (! strcmp (file->main_string_table + section_header->sh_name, ".strtab")) {
-                                       strtab_index = section_index;
-                               } else if (! strcmp (file->main_string_table + section_header->sh_name, ".dynstr")) {
-                                       dynstr_index = section_index;
+                       /* OK, this is a usable elf file... */
+                       file->header = header;
+                       section_headers = file->data + header->e_shoff;
+                       file->main_string_table = ((const char*) file->data) + (((ElfSection*) (section_headers + (header->e_shentsize * header->e_shstrndx)))->sh_offset);
+                       
+                       for (section_index = 0; section_index < header->e_shnum; section_index ++) {
+                               ElfSection *section_header = (ElfSection*) (section_headers + (header->e_shentsize * section_index));
+                               
+                               if (section_header->sh_type == ELF_SHT_SYMTAB) {
+                                       symtab_index = section_index;
+                               } else if (section_header->sh_type == ELF_SHT_DYNSYM) {
+                                       dynsym_index = section_index;
+                               } else if (section_header->sh_type == ELF_SHT_STRTAB) {
+                                       if (! strcmp (file->main_string_table + section_header->sh_name, ".strtab")) {
+                                               strtab_index = section_index;
+                                       } else if (! strcmp (file->main_string_table + section_header->sh_name, ".dynstr")) {
+                                               dynstr_index = section_index;
+                                       }
                                }
                        }
-               }
-               
-               if ((symtab_index != 0) && (strtab_index != 0)) {
-                       section_index = symtab_index;
-                       strings_index = strtab_index;
-               } else if ((dynsym_index != 0) && (dynstr_index != 0)) {
-                       section_index = dynsym_index;
-                       strings_index = dynstr_index;
+                       
+                       if ((symtab_index != 0) && (strtab_index != 0)) {
+                               section_index = symtab_index;
+                               strings_index = strtab_index;
+                       } else if ((dynsym_index != 0) && (dynstr_index != 0)) {
+                               section_index = dynsym_index;
+                               strings_index = dynstr_index;
+                       } else {
+                               section_index = 0;
+                               strings_index = 0;
+                       }
+                       
+                       if (section_index != 0) {
+                               ElfSection *section_header = (ElfSection*) (section_headers + (header->e_shentsize * section_index));
+                               file->symbol_size = section_header->sh_entsize;
+                               file->symbols_count = (guint32) (section_header->sh_size / section_header->sh_entsize);
+                               file->symbols_start = file->data + section_header->sh_offset;
+                               file->symbols_string_table = ((const char*) file->data) + (((ElfSection*) (section_headers + (header->e_shentsize * strings_index)))->sh_offset);
+                       }
+                       
+                       file->section_regions = g_new0 (ProfilerExecutableFileSectionRegion, file->header->e_shnum);
                } else {
-                       section_index = 0;
-                       strings_index = 0;
-               }
-               
-               if (section_index != 0) {
-                       ElfSection *section_header = (ElfSection*) (section_headers + (header->e_shentsize * section_index));
-                       file->symbol_size = section_header->sh_entsize;
-                       file->symbols_count = (guint32) (section_header->sh_size / section_header->sh_entsize);
-                       file->symbols_start = file->data + section_header->sh_offset;
-                       file->symbols_string_table = ((const char*) file->data) + (((ElfSection*) (section_headers + (header->e_shentsize * strings_index)))->sh_offset);
+                       region->file = file;
+                       file->reference_count ++;
                }
-               
-               file->section_regions = g_new0 (ProfilerExecutableFileSectionRegion, file->header->e_shnum);
-       } else {
-               region->file = file;
-               file->reference_count ++;
        }
        
        if (file->header != NULL) {
                executable_file_add_region_reference (file, region);
        }
        
-       if (file->next_new_file == NULL) {
-               file->next_new_file = files->new_files;
-               files->new_files = file;
-       }
        return file;
 }
 
@@ -2405,6 +2922,7 @@ executable_file_free (ProfilerExecutableFile* file) {
        }
        if (file->section_regions != NULL) {
                g_free (file->section_regions);
+               file->section_regions = NULL;
        }
        g_free (file);
 }
@@ -2413,6 +2931,12 @@ static void
 executable_file_close (ProfilerExecutableMemoryRegionData *region) {
        region->file->reference_count --;
        
+       if ((region->file_region_reference != NULL) && (region->file_region_reference->region == region)) {
+               region->file_region_reference->region = NULL;
+               region->file_region_reference->section_address = 0;
+               region->file_region_reference->section_offset = 0;
+       }
+       
        if (region->file->reference_count <= 0) {
                ProfilerExecutableFiles *files = & (profiler->executable_files);
                g_hash_table_remove (files->table, region->file_name);
@@ -2573,6 +3097,7 @@ executable_memory_region_find_symbol (ProfilerExecutableMemoryRegionData *region
 
 //FIXME: make also Win32 and BSD variants
 #define MAPS_BUFFER_SIZE 4096
+#define MAPS_FILENAME_SIZE 2048
 
 static gboolean
 update_regions_buffer (int fd, char *buffer) {
@@ -2651,13 +3176,12 @@ const char *map_line_parser_state [] = {
 };
 
 static char*
-parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer, char *current) {
+parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer, char *filename, char *current) {
        MapLineParserState state = MAP_LINE_PARSER_STATE_START_ADDRESS;
        gsize start_address = 0;
        gsize end_address = 0;
        guint32 offset = 0;
-       char *start_filename = NULL;
-       char *end_filename = NULL;
+       int filename_index = 0;
        gboolean is_executable = FALSE;
        gboolean done = FALSE;
        
@@ -2721,22 +3245,31 @@ parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer,
                case MAP_LINE_PARSER_STATE_BLANK_BEFORE_FILENAME:
                        if ((c == '/') || (c == '[')) {
                                state = MAP_LINE_PARSER_STATE_FILENAME;
-                               start_filename = current;
+                               filename [filename_index] = *current;
+                               filename_index ++;
                        } else if (! isblank (c)) {
                                state = MAP_LINE_PARSER_STATE_INVALID;
                        }
                        break;
                case MAP_LINE_PARSER_STATE_FILENAME:
-                       if (c == '\n') {
-                               state = MAP_LINE_PARSER_STATE_DONE;
-                               done = TRUE;
-                               end_filename = current;
+                       if (filename_index < MAPS_FILENAME_SIZE) {
+                               if (c == '\n') {
+                                       state = MAP_LINE_PARSER_STATE_DONE;
+                                       done = TRUE;
+                                       filename [filename_index] = 0;
+                               } else {
+                                       filename [filename_index] = *current;
+                                       filename_index ++;
+                               }
+                       } else {
+                               filename [filename_index] = 0;
+                               g_warning ("ELF filename too long: \"%s\"...\n", filename);
                        }
                        break;
                case MAP_LINE_PARSER_STATE_DONE:
                        if (done && is_executable) {
-                               *end_filename = 0;
-                               append_region (regions, (gpointer) start_address, (gpointer) end_address, offset, start_filename);
+                               filename [filename_index] = 0;
+                               append_region (regions, (gpointer) start_address, (gpointer) end_address, offset, filename);
                        }
                        return current;
                case MAP_LINE_PARSER_STATE_INVALID:
@@ -2760,6 +3293,7 @@ parse_map_line (ProfilerExecutableMemoryRegions *regions, int fd, char *buffer,
 static gboolean
 scan_process_regions (ProfilerExecutableMemoryRegions *regions) {
        char *buffer;
+       char *filename;
        char *current;
        int fd;
        
@@ -2769,13 +3303,15 @@ scan_process_regions (ProfilerExecutableMemoryRegions *regions) {
        }
        
        buffer = malloc (MAPS_BUFFER_SIZE);
+       filename = malloc (MAPS_FILENAME_SIZE);
        update_regions_buffer (fd, buffer);
        current = buffer;
        while (current != NULL) {
-               current = parse_map_line (regions, fd, buffer, current);
+               current = parse_map_line (regions, fd, buffer, filename, current);
        }
        
        free (buffer);
+       free (filename);
        
        close (fd);
        return TRUE;
@@ -2800,8 +3336,9 @@ refresh_memory_regions (void) {
        
        LOG_WRITER_THREAD ("Refreshing memory regions...");
        scan_process_regions (new_regions);
-       restore_old_regions (old_regions, new_regions);
        sort_regions (new_regions);
+       restore_old_regions (old_regions, new_regions);
+       fix_region_references (new_regions);
        LOG_WRITER_THREAD ("Refreshed memory regions.");
        
        LOG_WRITER_THREAD ("Building symbol tables...");
@@ -2874,7 +3411,7 @@ refresh_memory_regions (void) {
 
 static gboolean
 write_statistical_hit (MonoDomain *domain, gpointer address, gboolean regions_refreshed) {
-       MonoJitInfo *ji = mono_jit_info_table_find (mono_domain_get (), (char*) address);
+       MonoJitInfo *ji = (domain != NULL) ? mono_jit_info_table_find (domain, (char*) address) : NULL;
        
        if (ji != NULL) {
                MonoMethod *method = mono_jit_info_get_method (ji);
@@ -2949,11 +3486,11 @@ flush_all_mappings (void);
 
 static void
 write_statistical_data_block (ProfilerStatisticalData *data) {
+       MonoThread *current_thread = mono_thread_current ();
        int start_index = data->first_unwritten_index;
        int end_index = data->next_free_index;
        gboolean regions_refreshed = FALSE;
        int call_chain_depth = profiler->statistical_call_chain_depth;
-       MonoDomain *domain = mono_domain_get ();
        int index;
        
        if (end_index > data->end_index)
@@ -2972,15 +3509,15 @@ write_statistical_data_block (ProfilerStatisticalData *data) {
        
        for (index = start_index; index < end_index; index ++) {
                int base_index = index * (call_chain_depth + 1);
-               gpointer address = data->addresses [base_index];
+               ProfilerStatisticalHit hit = data->hits [base_index];
                int callers_count;
                
-               regions_refreshed = write_statistical_hit (domain, address, regions_refreshed);
+               regions_refreshed = write_statistical_hit ((current_thread != NULL) ? hit.domain : NULL, hit.address, regions_refreshed);
                base_index ++;
                
                for (callers_count = 0; callers_count < call_chain_depth; callers_count ++) {
-                       address = data->addresses [base_index + callers_count];
-                       if (address == NULL) {
+                       hit = data->hits [base_index + callers_count];
+                       if (hit.address == NULL) {
                                break;
                        }
                }
@@ -2989,9 +3526,9 @@ write_statistical_data_block (ProfilerStatisticalData *data) {
                        write_uint32 ((callers_count << 3) | MONO_PROFILER_STATISTICAL_CODE_CALL_CHAIN);
                        
                        for (callers_count = 0; callers_count < call_chain_depth; callers_count ++) {
-                               address = data->addresses [base_index + callers_count];
-                               if (address != NULL) {
-                                       regions_refreshed = write_statistical_hit (domain, address, regions_refreshed);
+                               hit = data->hits [base_index + callers_count];
+                               if (hit.address != NULL) {
+                                       regions_refreshed = write_statistical_hit ((current_thread != NULL) ? hit.domain : NULL, hit.address, regions_refreshed);
                                } else {
                                        break;
                                }
@@ -3049,7 +3586,9 @@ update_mapping (ProfilerPerThreadData *data) {
                        MethodIdMappingElement *element = method_id_mapping_element_get (start->data.address);
                        if (element == NULL) {
                                MonoMethod *method = start->data.address;
-                               method_id_mapping_element_new (method);
+                               if (method != NULL) {
+                                       method_id_mapping_element_new (method);
+                               }
                        }
                }
                
@@ -3082,7 +3621,7 @@ flush_full_event_data_buffer (ProfilerPerThreadData *data) {
        // We flush all mappings because some id definitions could come
        // from other threads
        flush_all_mappings ();
-       g_assert (data->first_unmapped_event >= data->end_event);
+       g_assert (data->first_unmapped_event >= data->next_free_event);
        
        write_thread_data_block (data);
        
@@ -3095,13 +3634,15 @@ flush_full_event_data_buffer (ProfilerPerThreadData *data) {
        UNLOCK_PROFILER ();
 }
 
-#define GET_NEXT_FREE_EVENT(d,e) {\
-       if ((d)->next_free_event >= (d)->end_event) {\
+/* The ">=" operator is intentional, to leave one spare slot for "extended values" */
+#define RESERVE_EVENTS(d,e,count) {\
+       if ((d)->next_free_event >= ((d)->end_event - (count))) {\
                flush_full_event_data_buffer (d);\
        }\
        (e) = (d)->next_free_event;\
-       (d)->next_free_event ++;\
+       (d)->next_free_event += (count);\
 } while (0)
+#define GET_NEXT_FREE_EVENT(d,e) RESERVE_EVENTS ((d),(e),1)
 
 static void
 flush_everything (void) {
@@ -3114,6 +3655,21 @@ flush_everything (void) {
        write_statistical_data_block (profiler->statistical_data);
 }
 
+/* This assumes the lock is held: it just offloads the work to the writer thread. */
+static void
+writer_thread_flush_everything (void) {
+       if (CHECK_WRITER_THREAD ()) {
+               profiler->writer_thread_flush_everything = TRUE;
+               LOG_WRITER_THREAD ("writer_thread_flush_everything: raising event...");
+               WRITER_EVENT_RAISE ();
+               LOG_WRITER_THREAD ("writer_thread_flush_everything: waiting event...");
+               WRITER_EVENT_DONE_WAIT ();
+               LOG_WRITER_THREAD ("writer_thread_flush_everything: got event.");
+       } else {
+               LOG_WRITER_THREAD ("writer_thread_flush_everything: no thread.");
+       }
+}
+
 #define RESULT_TO_LOAD_CODE(r) (((r)==MONO_PROFILE_OK)?MONO_PROFILER_LOADED_EVENT_SUCCESS:MONO_PROFILER_LOADED_EVENT_FAILURE)
 static void
 appdomain_start_load (MonoProfiler *profiler, MonoDomain *domain) {
@@ -3138,7 +3694,7 @@ static void
 appdomain_start_unload (MonoProfiler *profiler, MonoDomain *domain) {
        LOCK_PROFILER ();
        loaded_element_unload_start (profiler->loaded_appdomains, domain);
-       flush_everything ();
+       writer_thread_flush_everything ();
        UNLOCK_PROFILER ();
 }
 
@@ -3165,8 +3721,11 @@ module_end_load (MonoProfiler *profiler, MonoImage *module, int result) {
        MonoAssemblyName aname;
        LoadedElement *element;
        
-       mono_assembly_fill_assembly_name (module, &aname);
-       name = mono_stringify_assembly_name (&aname);
+       if (mono_assembly_fill_assembly_name (module, &aname)) {
+               name = mono_stringify_assembly_name (&aname);
+       } else {
+               name = g_strdup_printf ("Dynamic module \"%p\"", module);
+       }
        LOCK_PROFILER ();
        element = loaded_element_load_end (profiler->loaded_modules, module, name);
        write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_MODULE | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID ());
@@ -3177,7 +3736,7 @@ static void
 module_start_unload (MonoProfiler *profiler, MonoImage *module) {
        LOCK_PROFILER ();
        loaded_element_unload_start (profiler->loaded_modules, module);
-       flush_everything ();
+       writer_thread_flush_everything ();
        UNLOCK_PROFILER ();
 }
 
@@ -3204,8 +3763,11 @@ assembly_end_load (MonoProfiler *profiler, MonoAssembly *assembly, int result) {
        MonoAssemblyName aname;
        LoadedElement *element;
        
-       mono_assembly_fill_assembly_name (mono_assembly_get_image (assembly), &aname);
-       name = mono_stringify_assembly_name (&aname);
+       if (mono_assembly_fill_assembly_name (mono_assembly_get_image (assembly), &aname)) {
+               name = mono_stringify_assembly_name (&aname);
+       } else {
+               name = g_strdup_printf ("Dynamic assembly \"%p\"", assembly);
+       }
        LOCK_PROFILER ();
        element = loaded_element_load_end (profiler->loaded_assemblies, assembly, name);
        write_element_load_block (element, MONO_PROFILER_LOADED_EVENT_ASSEMBLY | RESULT_TO_LOAD_CODE (result), CURRENT_THREAD_ID ());
@@ -3216,7 +3778,7 @@ static void
 assembly_start_unload (MonoProfiler *profiler, MonoAssembly *assembly) {
        LOCK_PROFILER ();
        loaded_element_unload_start (profiler->loaded_assemblies, assembly);
-       flush_everything ();
+       writer_thread_flush_everything ();
        UNLOCK_PROFILER ();
 }
 static void
@@ -3241,11 +3803,14 @@ class_event_code_to_string (MonoProfilerClassEvents code) {
        }
 }
 static const char*
-method_event_code_to_string (MonoProfilerClassEvents code) {
+method_event_code_to_string (MonoProfilerMethodEvents code) {
        switch (code) {
        case MONO_PROFILER_EVENT_METHOD_CALL: return "CALL";
        case MONO_PROFILER_EVENT_METHOD_JIT: return "JIT";
        case MONO_PROFILER_EVENT_METHOD_FREED: return "FREED";
+       case MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER: return "ALLOCATION_CALLER";
+       case MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER: return "ALLOCATION_JIT_TIME_CALLER";
+       case MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID: return "ALLOCATION_OBJECT_ID";
        default: g_assert_not_reached (); return "";
        }
 }
@@ -3259,6 +3824,9 @@ number_event_code_to_string (MonoProfilerEvents code) {
        case MONO_PROFILER_EVENT_GC_RESIZE: return "GC_RESIZE";
        case MONO_PROFILER_EVENT_GC_STOP_WORLD: return "GC_STOP_WORLD";
        case MONO_PROFILER_EVENT_GC_START_WORLD: return "GC_START_WORLD";
+       case MONO_PROFILER_EVENT_JIT_TIME_ALLOCATION: return "JIT_TIME_ALLOCATION";
+       case MONO_PROFILER_EVENT_STACK_SECTION: return "STACK_SECTION";
+       case MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID: return "ALLOCATION_OBJECT_ID";
        default: g_assert_not_reached (); return "";
        }
 }
@@ -3279,12 +3847,12 @@ event_kind_to_string (MonoProfilerEventKind code) {
        }
 }
 static void
-print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
+print_event_data (ProfilerPerThreadData *data, ProfilerEventData *event, guint64 value) {
        if (event->data_type == MONO_PROFILER_EVENT_DATA_TYPE_CLASS) {
-               printf ("[TID %ld] CLASS[%p] event [%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s)\n",
-                               thread_id,
+               printf ("STORE EVENT [TID %ld][EVENT %ld] CLASS[%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s)\n",
+                               data->thread_id,
+                               event - data->events,
                                event->data.address,
-                               event,
                                class_event_code_to_string (event->code & ~MONO_PROFILER_EVENT_RESULT_MASK),
                                event_result_to_string (event->code & MONO_PROFILER_EVENT_RESULT_MASK),
                                event_kind_to_string (event->kind),
@@ -3295,10 +3863,10 @@ print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
                                mono_class_get_namespace ((MonoClass*) event->data.address),
                                mono_class_get_name ((MonoClass*) event->data.address));
        } else if (event->data_type == MONO_PROFILER_EVENT_DATA_TYPE_METHOD) {
-               printf ("[TID %ld] METHOD[%p] event [%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s:%s (?))\n",
-                               thread_id,
+               printf ("STORE EVENT [TID %ld][EVENT %ld]  METHOD[%p] %s:%s:%s[%d-%d-%d] %ld (%s.%s:%s (?))\n",
+                               data->thread_id,
+                               event - data->events,
                                event->data.address,
-                               event,
                                method_event_code_to_string (event->code & ~MONO_PROFILER_EVENT_RESULT_MASK),
                                event_result_to_string (event->code & MONO_PROFILER_EVENT_RESULT_MASK),
                                event_kind_to_string (event->kind),
@@ -3306,14 +3874,14 @@ print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
                                event->kind,
                                event->code,
                                value,
-                               mono_class_get_namespace (mono_method_get_class ((MonoMethod*) event->data.address)),
-                               mono_class_get_name (mono_method_get_class ((MonoMethod*) event->data.address)),
-                               mono_method_get_name ((MonoMethod*) event->data.address));
+                               (event->data.address != NULL) ? mono_class_get_namespace (mono_method_get_class ((MonoMethod*) event->data.address)) : "<NULL>",
+                               (event->data.address != NULL) ? mono_class_get_name (mono_method_get_class ((MonoMethod*) event->data.address)) : "<NULL>",
+                               (event->data.address != NULL) ? mono_method_get_name ((MonoMethod*) event->data.address) : "<NULL>");
        } else {
-               printf ("[TID %ld] NUMBER[%ld] event [%p] %s:%s[%d-%d-%d] %ld\n",
-                               thread_id,
+               printf ("STORE EVENT [TID %ld][EVENT %ld]  NUMBER[%ld] %s:%s[%d-%d-%d] %ld\n",
+                               data->thread_id,
+                               event - data->events,
                                (guint64) event->data.number,
-                               event,
                                number_event_code_to_string (event->code),
                                event_kind_to_string (event->kind),
                                event->data_type,
@@ -3322,129 +3890,134 @@ print_event_data (gsize thread_id, ProfilerEventData *event, guint64 value) {
                                value);
        }
 }
-#define LOG_EVENT(tid,ev,val) print_event_data ((tid),(ev),(val))
+#define LOG_EVENT(data,ev,val) print_event_data ((data),(ev),(val))
 #else
-#define LOG_EVENT(tid,ev,val)
+#define LOG_EVENT(data,ev,val)
 #endif
 
 #define RESULT_TO_EVENT_CODE(r) (((r)==MONO_PROFILE_OK)?MONO_PROFILER_EVENT_RESULT_SUCCESS:MONO_PROFILER_EVENT_RESULT_FAILURE)
 
-#define STORE_EVENT_ITEM_COUNTER(p,i,dt,c,k) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
+#define STORE_EVENT_ITEM_COUNTER(event,p,i,dt,c,k) do {\
        guint64 counter;\
        guint64 delta;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
        MONO_PROFILER_GET_CURRENT_COUNTER (counter);\
-       event->data.address = (i);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+       (event)->data.address = (i);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        delta = counter - data->last_event_counter;\
        if (delta < MAX_EVENT_VALUE) {\
-               event->value = delta;\
+               (event)->value = delta;\
        } else {\
                ProfilerEventData *extension = data->next_free_event;\
                data->next_free_event ++;\
-               event->value = MAX_EVENT_VALUE;\
+               (event)->value = MAX_EVENT_VALUE;\
                *(guint64*)extension = delta;\
        }\
        data->last_event_counter = counter;\
-       LOG_EVENT (data->thread_id, event, delta);\
+       LOG_EVENT (data, (event), delta);\
 } while (0);
-#define STORE_EVENT_ITEM_VALUE(p,i,dt,c,k,v) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
-       event->data.address = (i);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+#define STORE_EVENT_ITEM_VALUE(event,p,i,dt,c,k,v) do {\
+       (event)->data.address = (i);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        if ((v) < MAX_EVENT_VALUE) {\
-               event->value = (v);\
+               (event)->value = (v);\
        } else {\
                ProfilerEventData *extension = data->next_free_event;\
                data->next_free_event ++;\
-               event->value = MAX_EVENT_VALUE;\
+               (event)->value = MAX_EVENT_VALUE;\
                *(guint64*)extension = (v);\
        }\
-       LOG_EVENT (data->thread_id, event, (v));\
+       LOG_EVENT (data, (event), (v));\
 }while (0);
-#define STORE_EVENT_NUMBER_COUNTER(p,n,dt,c,k) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
+#define STORE_EVENT_NUMBER_COUNTER(event,p,n,dt,c,k) do {\
        guint64 counter;\
        guint64 delta;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
        MONO_PROFILER_GET_CURRENT_COUNTER (counter);\
-       event->data.number = (n);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+       (event)->data.number = (n);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        delta = counter - data->last_event_counter;\
        if (delta < MAX_EVENT_VALUE) {\
-               event->value = delta;\
+               (event)->value = delta;\
        } else {\
                ProfilerEventData *extension = data->next_free_event;\
                data->next_free_event ++;\
-               event->value = MAX_EVENT_VALUE;\
+               (event)->value = MAX_EVENT_VALUE;\
                *(guint64*)extension = delta;\
        }\
        data->last_event_counter = counter;\
-       LOG_EVENT (data->thread_id, event, delta);\
+       LOG_EVENT (data, (event), delta);\
 }while (0);
-#define STORE_EVENT_NUMBER_VALUE(p,n,dt,c,k,v) do {\
-       ProfilerPerThreadData *data;\
-       ProfilerEventData *event;\
-       GET_PROFILER_THREAD_DATA (data);\
-       GET_NEXT_FREE_EVENT (data, event);\
-       event->data.number = (n);\
-       event->data_type = (dt);\
-       event->code = (c);\
-       event->kind = (k);\
+#define STORE_EVENT_NUMBER_VALUE(event,p,n,dt,c,k,v) do {\
+       (event)->data.number = (n);\
+       (event)->data_type = (dt);\
+       (event)->code = (c);\
+       (event)->kind = (k);\
        if ((v) < MAX_EVENT_VALUE) {\
-               event->value = (v);\
+               (event)->value = (v);\
        } else {\
                ProfilerEventData *extension = data->next_free_event;\
                data->next_free_event ++;\
-               event->value = MAX_EVENT_VALUE;\
+               (event)->value = MAX_EVENT_VALUE;\
                *(guint64*)extension = (v);\
        }\
-       LOG_EVENT (data->thread_id, event, (v));\
+       LOG_EVENT (data, (event), (v));\
 }while (0);
 
-
 static void
 class_start_load (MonoProfiler *profiler, MonoClass *klass) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD, MONO_PROFILER_EVENT_KIND_START);
 }
 static void
 class_end_load (MonoProfiler *profiler, MonoClass *klass, int result) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_LOAD | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
 }
 static void
 class_start_unload (MonoProfiler *profiler, MonoClass *klass) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_START);
 }
 static void
 class_end_unload (MonoProfiler *profiler, MonoClass *klass) {
-       STORE_EVENT_ITEM_COUNTER (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_UNLOAD, MONO_PROFILER_EVENT_KIND_END);
 }
 
 static void
 method_start_jit (MonoProfiler *profiler, MonoMethod *method) {
-       if (profiler->action_flags.jit_time) {
-               STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT, MONO_PROFILER_EVENT_KIND_START);
-       }
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       thread_stack_push_jitted_safely (&(data->stack), method, TRUE);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT, MONO_PROFILER_EVENT_KIND_START);
 }
 static void
 method_end_jit (MonoProfiler *profiler, MonoMethod *method, int result) {
-       if (profiler->action_flags.jit_time) {
-               STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
-       }
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_JIT | RESULT_TO_EVENT_CODE (result), MONO_PROFILER_EVENT_KIND_END);
+       thread_stack_pop (&(data->stack));
 }
 
 #if (HAS_OPROFILE)
@@ -3470,60 +4043,157 @@ method_jit_result (MonoProfiler *prof, MonoMethod *method, MonoJitInfo* jinfo, i
 
 static void
 method_enter (MonoProfiler *profiler, MonoMethod *method) {
-       STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       
+       CHECK_PROFILER_ENABLED ();
+       GET_PROFILER_THREAD_DATA (data);
+       if (profiler->action_flags.track_calls) {
+               ProfilerEventData *event;
+               GET_NEXT_FREE_EVENT (data, event);
+               STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_START);
+       }
+       if (profiler->action_flags.track_stack) {
+               thread_stack_push_safely (&(data->stack), method);
+       }
 }
 static void
 method_leave (MonoProfiler *profiler, MonoMethod *method) {
-       STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       
+       CHECK_PROFILER_ENABLED ();
+       GET_PROFILER_THREAD_DATA (data);
+       if (profiler->action_flags.track_calls) {
+               ProfilerEventData *event;
+               GET_NEXT_FREE_EVENT (data, event);
+               STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_CALL, MONO_PROFILER_EVENT_KIND_END);
+       }
+       if (profiler->action_flags.track_stack) {
+               thread_stack_pop (&(data->stack));
+       }
 }
 
 static void
 method_free (MonoProfiler *profiler, MonoMethod *method) {
-       STORE_EVENT_ITEM_COUNTER (profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_FREED, 0);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_ITEM_COUNTER (event, profiler, method, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_FREED, 0);
 }
 
 static void
 thread_start (MonoProfiler *profiler, gsize tid) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_START);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_NUMBER_COUNTER (event, profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_START);
 }
 static void
 thread_end (MonoProfiler *profiler, gsize tid) {
-       STORE_EVENT_NUMBER_COUNTER (profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_END);
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_NUMBER_COUNTER (event, profiler, tid, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_THREAD, MONO_PROFILER_EVENT_KIND_END);
 }
 
 static void
 object_allocated (MonoProfiler *profiler, MonoObject *obj, MonoClass *klass) {
-       ProfilerPerThreadData *thread_data;
+       ProfilerPerThreadData *data;
+       ProfilerEventData *events;
+       int unsaved_frames;
+       int event_slot_count;
+       
+       GET_PROFILER_THREAD_DATA (data);
+       event_slot_count = 1;
+       if (profiler->action_flags.save_allocation_caller) {
+               event_slot_count ++;
+       }
+       if (profiler->action_flags.allocations_carry_id) {
+               event_slot_count ++;
+       }
+       if (profiler->action_flags.save_allocation_stack) {
+               unsaved_frames = thread_stack_count_unsaved_frames (&(data->stack));
+               event_slot_count += (unsaved_frames + 1);
+       } else {
+               unsaved_frames = 0;
+       }
+       RESERVE_EVENTS (data, events, event_slot_count);
        
-       STORE_EVENT_ITEM_VALUE (profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_ALLOCATION, 0, (guint64) mono_object_get_size (obj));
-       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot) {
-               GET_PROFILER_THREAD_DATA (thread_data);
-               STORE_ALLOCATED_OBJECT (thread_data, obj);
+       if (profiler->action_flags.save_allocation_stack) {
+               int i;
+               
+               STORE_EVENT_NUMBER_VALUE (events, profiler, data->stack.last_saved_top, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_STACK_SECTION, 0, unsaved_frames);
+               events++;
+               for (i = 0; i < unsaved_frames; i++) {
+                       if (! thread_stack_index_from_top_is_jitted (&(data->stack), i)) {
+                               STORE_EVENT_ITEM_VALUE (events, profiler, thread_stack_index_from_top (&(data->stack), i), MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER, 0, 0);
+                       } else {
+                               STORE_EVENT_ITEM_VALUE (events, profiler, thread_stack_index_from_top (&(data->stack), i), MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER, 0, 0);
+                       }
+                       events ++;
+               }
+               
+               data->stack.last_saved_top = data->stack.top;
+       }
+       
+       STORE_EVENT_ITEM_VALUE (events, profiler, klass, MONO_PROFILER_EVENT_DATA_TYPE_CLASS, MONO_PROFILER_EVENT_CLASS_ALLOCATION, 0, (guint64) mono_object_get_size (obj));
+       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot || profiler->action_flags.collection_summary) {
+               STORE_ALLOCATED_OBJECT (data, obj);
+       }
+       
+       if (profiler->action_flags.save_allocation_caller) {
+               MonoMethod *caller = thread_stack_top (&(data->stack));
+               gboolean caller_is_jitted = thread_stack_top_is_jitted (&(data->stack));
+               int index = 1;
+               events ++;
+               
+               while ((caller != NULL) && (caller->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)) {
+                       caller = thread_stack_index_from_top (&(data->stack), index);
+                       caller_is_jitted = thread_stack_index_from_top_is_jitted (&(data->stack), index);
+                       index ++;
+               }
+               if (! caller_is_jitted) {
+                       STORE_EVENT_ITEM_VALUE (events, profiler, caller, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_CALLER, 0, 0);
+               } else {
+                       STORE_EVENT_ITEM_VALUE (events, profiler, caller, MONO_PROFILER_EVENT_DATA_TYPE_METHOD, MONO_PROFILER_EVENT_METHOD_ALLOCATION_JIT_TIME_CALLER, 0, 0);
+               }
+       }
+       if (profiler->action_flags.allocations_carry_id) {
+               events ++;
+               STORE_EVENT_ITEM_VALUE (events, profiler, obj, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_ALLOCATION_OBJECT_ID, 0, 0);
        }
 }
 
 static void
 statistical_call_chain (MonoProfiler *profiler, int call_chain_depth, guchar **ips, void *context) {
+       MonoDomain *domain = mono_domain_get ();
        ProfilerStatisticalData *data;
-       int index;
+       unsigned int index;
        
+       CHECK_PROFILER_ENABLED ();
        do {
                data = profiler->statistical_data;
-               index = InterlockedIncrement (&data->next_free_index);
+               index = InterlockedIncrement ((int*) &data->next_free_index);
                
                if (index <= data->end_index) {
-                       int base_index = (index - 1) * (profiler->statistical_call_chain_depth + 1);
-                       int call_chain_index = 0;
+                       unsigned int base_index = (index - 1) * (profiler->statistical_call_chain_depth + 1);
+                       unsigned int call_chain_index = 0;
                        
                        //printf ("[statistical_call_chain] (%d)\n", call_chain_depth);
                        while (call_chain_index < call_chain_depth) {
+                               ProfilerStatisticalHit *hit = & (data->hits [base_index + call_chain_index]);
                                //printf ("[statistical_call_chain] [%d] = %p\n", base_index + call_chain_index, ips [call_chain_index]);
-                               data->addresses [base_index + call_chain_index] = (gpointer) ips [call_chain_index];
+                               hit->address = (gpointer) ips [call_chain_index];
+                               hit->domain = domain;
                                call_chain_index ++;
                        }
                        while (call_chain_index <= profiler->statistical_call_chain_depth) {
+                               ProfilerStatisticalHit *hit = & (data->hits [base_index + call_chain_index]);
                                //printf ("[statistical_call_chain] [%d] = NULL\n", base_index + call_chain_index);
-                               data->addresses [base_index + call_chain_index] = NULL;
+                               hit->address = NULL;
+                               hit->domain = NULL;
                                call_chain_index ++;
                        }
                } else {
@@ -3544,9 +4214,12 @@ statistical_call_chain (MonoProfiler *profiler, int call_chain_depth, guchar **i
                                profiler->statistical_data = new_data;
                                profiler->statistical_data_second_buffer = NULL;
                                WRITER_EVENT_RAISE ();
+                               /* Otherwise exit from the handler and drop the event... */
+                       } else {
+                               break;
                        }
                        
-                       /* Loop again, hoping to acquire a free slot this time */
+                       /* Loop again, hoping to acquire a free slot this time (otherwise the event will be dropped) */
                        data = NULL;
                }
        } while (data == NULL);
@@ -3554,15 +4227,19 @@ statistical_call_chain (MonoProfiler *profiler, int call_chain_depth, guchar **i
 
 static void
 statistical_hit (MonoProfiler *profiler, guchar *ip, void *context) {
+       MonoDomain *domain = mono_domain_get ();
        ProfilerStatisticalData *data;
-       int index;
+       unsigned int index;
        
+       CHECK_PROFILER_ENABLED ();
        do {
                data = profiler->statistical_data;
-               index = InterlockedIncrement (&data->next_free_index);
+               index = InterlockedIncrement ((int*) &data->next_free_index);
                
                if (index <= data->end_index) {
-                       data->addresses [index - 1] = (gpointer) ip;
+                       ProfilerStatisticalHit *hit = & (data->hits [index - 1]);
+                       hit->address = (gpointer) ip;
+                       hit->domain = domain;
                } else {
                        /* Check if we are the one that must swap the buffers */
                        if (index == data->end_index + 1) {
@@ -3576,7 +4253,7 @@ statistical_hit (MonoProfiler *profiler, guchar *ip, void *context) {
                                        /* Then, wait that it produced the free buffer */
                                        new_data = profiler->statistical_data_second_buffer;
                                } while (new_data == NULL);
-
+                               
                                profiler->statistical_data_ready = data;
                                profiler->statistical_data = new_data;
                                profiler->statistical_data_second_buffer = NULL;
@@ -4016,7 +4693,9 @@ handle_heap_profiling (MonoProfiler *profiler, MonoGCEvent ev) {
 
 static void
 gc_event (MonoProfiler *profiler, MonoGCEvent ev, int generation) {
-       gboolean do_heap_profiling = profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot;
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       gboolean do_heap_profiling = profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot || profiler->action_flags.collection_summary;
        guint32 event_value;
        
        if (ev == MONO_GC_EVENT_START) {
@@ -4028,7 +4707,11 @@ gc_event (MonoProfiler *profiler, MonoGCEvent ev, int generation) {
        if (do_heap_profiling && (ev == MONO_GC_EVENT_POST_STOP_WORLD)) {
                handle_heap_profiling (profiler, ev);
        }
-       STORE_EVENT_NUMBER_COUNTER (profiler, event_value, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, gc_event_code_from_profiler_event (ev), gc_event_kind_from_profiler_event (ev));
+       
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
+       STORE_EVENT_NUMBER_COUNTER (event, profiler, event_value, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, gc_event_code_from_profiler_event (ev), gc_event_kind_from_profiler_event (ev));
+       
        if (do_heap_profiling && (ev != MONO_GC_EVENT_POST_STOP_WORLD)) {
                handle_heap_profiling (profiler, ev);
        }
@@ -4036,8 +4719,25 @@ gc_event (MonoProfiler *profiler, MonoGCEvent ev, int generation) {
 
 static void
 gc_resize (MonoProfiler *profiler, gint64 new_size) {
+       ProfilerPerThreadData *data;
+       ProfilerEventData *event;
+       GET_PROFILER_THREAD_DATA (data);
+       GET_NEXT_FREE_EVENT (data, event);
        profiler->garbage_collection_counter ++;
-       STORE_EVENT_NUMBER_VALUE (profiler, new_size, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_GC_RESIZE, 0, profiler->garbage_collection_counter);
+       STORE_EVENT_NUMBER_VALUE (event, profiler, new_size, MONO_PROFILER_EVENT_DATA_TYPE_OTHER, MONO_PROFILER_EVENT_GC_RESIZE, 0, profiler->garbage_collection_counter);
+}
+
+static void
+runtime_initialized (MonoProfiler *profiler) {
+       LOG_WRITER_THREAD ("runtime_initialized: waking writer thread to enable it...\n");
+       WRITER_EVENT_ENABLE_RAISE ();
+       LOG_WRITER_THREAD ("runtime_initialized: waiting writer thread...\n");
+       WRITER_EVENT_DONE_WAIT ();
+       LOG_WRITER_THREAD ("runtime_initialized: writer thread enabled.\n");
+       mono_add_internal_call ("Mono.Profiler.RuntimeControls::EnableProfiler", enable_profiler);
+       mono_add_internal_call ("Mono.Profiler.RuntimeControls::DisableProfiler", disable_profiler);
+       mono_add_internal_call ("Mono.Profiler.RuntimeControls::TakeHeapSnapshot", request_heap_snapshot);
+       LOG_WRITER_THREAD ("runtime_initialized: initialized internal calls.\n");
 }
 
 /* called at the end of the program */
@@ -4045,6 +4745,7 @@ static void
 profiler_shutdown (MonoProfiler *prof)
 {
        ProfilerPerThreadData* current_thread_data;
+       ProfilerPerThreadData* next_thread_data;
        
        LOG_WRITER_THREAD ("profiler_shutdown: zeroing relevant flags");
        mono_profiler_set_events (0);
@@ -4061,17 +4762,14 @@ profiler_shutdown (MonoProfiler *prof)
        WRITER_EVENT_DESTROY ();
        
        LOCK_PROFILER ();
-       
+       flush_everything ();
        MONO_PROFILER_GET_CURRENT_TIME (profiler->end_time);
        MONO_PROFILER_GET_CURRENT_COUNTER (profiler->end_counter);
-       
-       mono_thread_attach (mono_get_root_domain ());
-       
-       flush_everything ();
        write_end_block ();
        FLUSH_FILE ();
        CLOSE_FILE();
        UNLOCK_PROFILER ();
+       
        g_free (profiler->file_name);
        if (profiler->file_name_suffix != NULL) {
                g_free (profiler->file_name_suffix);
@@ -4085,7 +4783,8 @@ profiler_shutdown (MonoProfiler *prof)
        
        FREE_PROFILER_THREAD_DATA ();
        
-       for (current_thread_data = profiler->per_thread_data; current_thread_data != NULL; current_thread_data = current_thread_data->next) {
+       for (current_thread_data = profiler->per_thread_data; current_thread_data != NULL; current_thread_data = next_thread_data) {
+               next_thread_data = current_thread_data->next;
                profiler_per_thread_data_destroy (current_thread_data);
        }
        if (profiler->statistical_data != NULL) {
@@ -4121,12 +4820,53 @@ profiler_shutdown (MonoProfiler *prof)
        profiler = NULL;
 }
 
+#ifndef PLATFORM_WIN32
+static int
+parse_signal_name (const char *signal_name) {
+       if (! strcasecmp (signal_name, "SIGUSR1")) {
+               return SIGUSR1;
+       } else if (! strcasecmp (signal_name, "SIGUSR2")) {
+               return SIGUSR2;
+       } else if (! strcasecmp (signal_name, "SIGPROF")) {
+               return SIGPROF;
+       } else {
+               return atoi (signal_name);
+       }
+}
+static gboolean
+check_signal_number (int signal_number) {
+       if (((signal_number == SIGPROF) && ! (profiler->flags & MONO_PROFILE_STATISTICAL)) ||
+                       (signal_number == SIGUSR1) ||
+                       (signal_number == SIGUSR2)) {
+               return TRUE;
+       } else {
+               return FALSE;
+       }
+}
+#endif
+
+#define FAIL_ARGUMENT_CHECK(message) do {\
+       failure_message = (message);\
+       goto failure_handling;\
+} while (0)
+#define FAIL_PARSING_VALUED_ARGUMENT FAIL_ARGUMENT_CHECK("cannot parse valued argument %s")
+#define FAIL_PARSING_FLAG_ARGUMENT FAIL_ARGUMENT_CHECK("cannot parse flag argument %s")
+#define CHECK_CONDITION(condition,message) do {\
+       gboolean result = (condition);\
+       if (result) {\
+               FAIL_ARGUMENT_CHECK (message);\
+       }\
+} while (0)
+#define FAIL_IF_HAS_MINUS CHECK_CONDITION(has_minus,"minus ('-') modifier not allowed for argument %s")
+#define TRUE_IF_NOT_MINUS ((!has_minus)?TRUE:FALSE)
+
 #define DEFAULT_ARGUMENTS "s"
 static void
 setup_user_options (const char *arguments) {
        gchar **arguments_array, **current_argument;
 #ifndef PLATFORM_WIN32
        int gc_request_signal_number = 0;
+       int toggle_signal_number = 0;
 #endif
        detect_fast_timer ();
        
@@ -4144,7 +4884,9 @@ setup_user_options (const char *arguments) {
                        MONO_PROFILE_ASSEMBLY_EVENTS|
                        MONO_PROFILE_MODULE_EVENTS|
                        MONO_PROFILE_CLASS_EVENTS|
-                       MONO_PROFILE_METHOD_EVENTS;
+                       MONO_PROFILE_METHOD_EVENTS|
+                       MONO_PROFILE_JIT_COMPILATION;
+       profiler->profiler_enabled = TRUE;
        
        if (arguments == NULL) {
                arguments = DEFAULT_ARGUMENTS;
@@ -4160,124 +4902,228 @@ setup_user_options (const char *arguments) {
        for (current_argument = arguments_array; ((current_argument != NULL) && (current_argument [0] != 0)); current_argument ++) {
                char *argument = *current_argument;
                char *equals = strstr (argument, "=");
+               const char *failure_message = NULL;
+               gboolean has_plus;
+               gboolean has_minus;
+               
+               if (*argument == '+') {
+                       has_plus = TRUE;
+                       has_minus = FALSE;
+                       argument ++;
+               } else if (*argument == '-') {
+                       has_plus = FALSE;
+                       has_minus = TRUE;
+                       argument ++;
+               } else {
+                       has_plus = FALSE;
+                       has_minus = FALSE;
+               }
                
                if (equals != NULL) {
                        int equals_position = equals - argument;
                        
                        if (! (strncmp (argument, "per-thread-buffer-size", equals_position) && strncmp (argument, "tbs", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        profiler->per_thread_buffer_size = value;
                                }
                        } else if (! (strncmp (argument, "statistical", equals_position) && strncmp (argument, "stat", equals_position) && strncmp (argument, "s", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        if (value > 16) {
                                                value = 16;
                                        }
                                        profiler->statistical_call_chain_depth = value;
-                                       profiler->flags |= MONO_PROFILE_STATISTICAL|MONO_PROFILE_JIT_COMPILATION;
-                                       profiler->action_flags.jit_time = TRUE;
+                                       profiler->flags |= MONO_PROFILE_STATISTICAL;
                                }
                        } else if (! (strncmp (argument, "statistical-thread-buffer-size", equals_position) && strncmp (argument, "sbs", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        profiler->statistical_buffer_size = value;
                                }
                        } else if (! (strncmp (argument, "write-buffer-size", equals_position) && strncmp (argument, "wbs", equals_position))) {
                                int value = atoi (equals + 1);
+                               FAIL_IF_HAS_MINUS;
                                if (value > 0) {
                                        profiler->write_buffer_size = value;
                                }
                        } else if (! (strncmp (argument, "output", equals_position) && strncmp (argument, "out", equals_position) && strncmp (argument, "o", equals_position) && strncmp (argument, "O", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->file_name = g_strdup (equals + 1);
                                }
                        } else if (! (strncmp (argument, "output-suffix", equals_position) && strncmp (argument, "suffix", equals_position) && strncmp (argument, "os", equals_position) && strncmp (argument, "OS", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->file_name_suffix = g_strdup (equals + 1);
                                }
+                       } else if (! (strncmp (argument, "heap-shot", equals_position) && strncmp (argument, "heap", equals_position) && strncmp (argument, "h", equals_position))) {
+                               char *parameter = equals + 1;
+                               if (! strcmp (parameter, "all")) {
+                                       profiler->dump_next_heap_snapshots = -1;
+                               } else {
+                                       gc_request_signal_number = parse_signal_name (parameter);
+                               }
+                               FAIL_IF_HAS_MINUS;
+                               if (! has_plus) {
+                                       profiler->action_flags.save_allocation_caller = TRUE;
+                                       profiler->action_flags.save_allocation_stack = TRUE;
+                                       profiler->action_flags.allocations_carry_id = TRUE_IF_NOT_MINUS;
+                               }
+                               profiler->action_flags.heap_shot = TRUE_IF_NOT_MINUS;
                        } else if (! (strncmp (argument, "gc-commands", equals_position) && strncmp (argument, "gc-c", equals_position) && strncmp (argument, "gcc", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->heap_shot_command_file_name = g_strdup (equals + 1);
                                }
                        } else if (! (strncmp (argument, "gc-dumps", equals_position) && strncmp (argument, "gc-d", equals_position) && strncmp (argument, "gcd", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        profiler->dump_next_heap_snapshots = atoi (equals + 1);
                                }
 #ifndef PLATFORM_WIN32
                        } else if (! (strncmp (argument, "gc-signal", equals_position) && strncmp (argument, "gc-s", equals_position) && strncmp (argument, "gcs", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
                                if (strlen (equals + 1) > 0) {
                                        char *signal_name = equals + 1;
-                                       if (! strcasecmp (signal_name, "SIGUSR1")) {
-                                               gc_request_signal_number = SIGUSR1;
-                                       } else if (! strcasecmp (signal_name, "SIGUSR2")) {
-                                               gc_request_signal_number = SIGUSR2;
-                                       } else if (! strcasecmp (signal_name, "SIGPROF")) {
-                                               gc_request_signal_number = SIGPROF;
-                                       } else {
-                                               gc_request_signal_number = atoi (signal_name);
-                                       }
+                                       gc_request_signal_number = parse_signal_name (signal_name);
+                               }
+                       } else if (! (strncmp (argument, "toggle-signal", equals_position) && strncmp (argument, "ts", equals_position))) {
+                               FAIL_IF_HAS_MINUS;
+                               if (strlen (equals + 1) > 0) {
+                                       char *signal_name = equals + 1;
+                                       toggle_signal_number = parse_signal_name (signal_name);
                                }
 #endif
                        } else {
-                               g_warning ("Cannot parse valued argument %s\n", argument);
+                               FAIL_PARSING_VALUED_ARGUMENT;
                        }
                } else {
                        if (! (strcmp (argument, "jit") && strcmp (argument, "j"))) {
-                               profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
-                               profiler->action_flags.jit_time = TRUE;
+                               profiler->action_flags.jit_time = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "allocations") && strcmp (argument, "alloc") && strcmp (argument, "a"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
+                               FAIL_IF_HAS_MINUS;
+                               if (! has_plus) {
+                                       profiler->action_flags.save_allocation_caller = TRUE;
+                                       profiler->action_flags.save_allocation_stack = TRUE;
+                               }
+                               if (! has_minus) {
+                                       profiler->flags |= MONO_PROFILE_ALLOCATIONS;
+                               } else {
+                                       profiler->flags &= ~MONO_PROFILE_ALLOCATIONS;
+                               }
                        } else if (! (strcmp (argument, "gc") && strcmp (argument, "g"))) {
+                               FAIL_IF_HAS_MINUS;
                                profiler->flags |= MONO_PROFILE_GC;
                        } else if (! (strcmp (argument, "allocations-summary") && strcmp (argument, "as"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
-                               profiler->action_flags.collection_summary = TRUE;
+                               profiler->action_flags.collection_summary = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "heap-shot") && strcmp (argument, "heap") && strcmp (argument, "h"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
-                               profiler->action_flags.unreachable_objects = TRUE;
-                               profiler->action_flags.heap_shot = TRUE;
+                               FAIL_IF_HAS_MINUS;
+                               if (! has_plus) {
+                                       profiler->action_flags.save_allocation_caller = TRUE;
+                                       profiler->action_flags.save_allocation_stack = TRUE;
+                                       profiler->action_flags.allocations_carry_id = TRUE_IF_NOT_MINUS;
+                               }
+                               profiler->action_flags.heap_shot = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "unreachable") && strcmp (argument, "free") && strcmp (argument, "f"))) {
-                               profiler->flags |= MONO_PROFILE_ALLOCATIONS|MONO_PROFILE_GC;
-                               profiler->action_flags.unreachable_objects = TRUE;
+                               profiler->action_flags.unreachable_objects = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "threads") && strcmp (argument, "t"))) {
-                               profiler->flags |= MONO_PROFILE_THREADS;
+                               if (! has_minus) {
+                                       profiler->flags |= MONO_PROFILE_THREADS;
+                               } else {
+                                       profiler->flags &= ~MONO_PROFILE_THREADS;
+                               }
                        } else if (! (strcmp (argument, "enter-leave") && strcmp (argument, "calls") && strcmp (argument, "c"))) {
-                               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+                               profiler->action_flags.track_calls = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "statistical") && strcmp (argument, "stat") && strcmp (argument, "s"))) {
-                               profiler->flags |= MONO_PROFILE_STATISTICAL|MONO_PROFILE_JIT_COMPILATION;
-                               profiler->action_flags.jit_time = TRUE;
+                               if (! has_minus) {
+                                       profiler->flags |= MONO_PROFILE_STATISTICAL;
+                               } else {
+                                       profiler->flags &= ~MONO_PROFILE_STATISTICAL;
+                               }
+                       } else if (! (strcmp (argument, "save-allocation-caller") && strcmp (argument, "sac"))) {
+                               profiler->action_flags.save_allocation_caller = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "save-allocation-stack") && strcmp (argument, "sas"))) {
+                               profiler->action_flags.save_allocation_stack = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "allocations-carry-id") && strcmp (argument, "aci"))) {
+                               profiler->action_flags.allocations_carry_id = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "start-enabled") && strcmp (argument, "se"))) {
+                               profiler->profiler_enabled = TRUE_IF_NOT_MINUS;
+                       } else if (! (strcmp (argument, "start-disabled") && strcmp (argument, "sd"))) {
+                               profiler->profiler_enabled = TRUE_IF_NOT_MINUS;
                        } else if (! (strcmp (argument, "force-accurate-timer") && strcmp (argument, "fac"))) {
-                               use_fast_timer = FALSE;
+                               use_fast_timer = TRUE_IF_NOT_MINUS;
 #if (HAS_OPROFILE)
                        } else if (! (strcmp (argument, "oprofile") && strcmp (argument, "oprof"))) {
                                profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
                                profiler->action_flags.oprofile = TRUE;
                                if (op_open_agent ()) {
-                                       g_warning ("Problem calling op_open_agent\n");
+                                       FAIL_ARGUMENT_CHECK ("problem calling op_open_agent");
                                }
 #endif
                        } else if (strcmp (argument, "logging")) {
-                               g_warning ("Cannot parse flag argument %s\n", argument);
+                               FAIL_PARSING_FLAG_ARGUMENT;
                        }
                }
+               
+failure_handling:
+               if (failure_message != NULL) {
+                       g_warning (failure_message, argument);
+                       failure_message = NULL;
+               }
        }
        
        g_free (arguments_array);
        
 #ifndef PLATFORM_WIN32
        if (gc_request_signal_number != 0) {
-               if (((gc_request_signal_number == SIGPROF) && ! (profiler->flags & MONO_PROFILE_STATISTICAL)) ||
-                               (gc_request_signal_number == SIGUSR1) ||
-                               (gc_request_signal_number == SIGUSR2)) {
+               if (check_signal_number (gc_request_signal_number) && (gc_request_signal_number != toggle_signal_number)) {
                        add_gc_request_handler (gc_request_signal_number);
                } else {
                        g_error ("Cannot use signal %d", gc_request_signal_number);
                }
        }
+       if (toggle_signal_number != 0) {
+               if (check_signal_number (toggle_signal_number) && (toggle_signal_number != gc_request_signal_number)) {
+                       add_toggle_handler (toggle_signal_number);
+               } else {
+                       g_error ("Cannot use signal %d", gc_request_signal_number);
+               }
+       }
 #endif
        
+       /* Ensure that the profiler flags needed to support required action flags are active */
+       if (profiler->action_flags.jit_time) {
+               profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
+       }
+       if (profiler->action_flags.save_allocation_caller || profiler->action_flags.save_allocation_stack || profiler->action_flags.allocations_carry_id) {
+               profiler->flags |= MONO_PROFILE_ALLOCATIONS;
+       }
+       if (profiler->action_flags.collection_summary || profiler->action_flags.heap_shot || profiler->action_flags.unreachable_objects) {
+               profiler->flags |= MONO_PROFILE_ALLOCATIONS;
+       }
+       if (profiler->action_flags.track_calls) {
+               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+               profiler->action_flags.jit_time = TRUE;
+       }
+       if (profiler->action_flags.save_allocation_caller || profiler->action_flags.save_allocation_stack) {
+               profiler->action_flags.track_stack = TRUE;
+               profiler->flags |= MONO_PROFILE_ENTER_LEAVE;
+       }
+       
+       /* Without JIT events the stat profiler will not find method IDs... */
+       if (profiler->flags | MONO_PROFILE_STATISTICAL) {
+               profiler->flags |= MONO_PROFILE_JIT_COMPILATION;
+       }
+       /* Profiling allocations without knowing which gc we are doing is not nice... */
+       if (profiler->flags | MONO_PROFILE_ALLOCATIONS) {
+               profiler->flags |= MONO_PROFILE_GC;
+       }
+
+       
        if (profiler->file_name == NULL) {
                char *program_name = g_get_prgname ();
                
@@ -4337,6 +5183,27 @@ data_writer_thread (gpointer nothing) {
        static gboolean thread_detached = FALSE;
        static MonoThread *this_thread = NULL;
        
+       /* Wait for the OK to attach to the runtime */
+       WRITER_EVENT_ENABLE_WAIT ();
+       if (! profiler->terminate_writer_thread) {
+               MonoDomain * root_domain = mono_get_root_domain ();
+               if (root_domain != NULL) {
+                       LOG_WRITER_THREAD ("data_writer_thread: attaching thread");
+                       this_thread = mono_thread_attach (root_domain);
+                       mono_thread_set_manage_callback (this_thread, thread_detach_callback);
+                       thread_attached = TRUE;
+               } else {
+                       g_error ("Cannot get root domain\n");
+               }
+       } else {
+               /* Execution was too short, pretend we attached and detached. */
+               thread_attached = TRUE;
+               thread_detached = TRUE;
+       }
+       profiler->writer_thread_enabled = TRUE;
+       /* Notify that we are attached to the runtime */
+       WRITER_EVENT_DONE_RAISE ();
+       
        for (;;) {
                ProfilerStatisticalData *statistical_data;
                gboolean done;
@@ -4345,24 +5212,6 @@ data_writer_thread (gpointer nothing) {
                WRITER_EVENT_WAIT ();
                LOG_WRITER_THREAD ("data_writer_thread: just woke up");
                
-               if (! thread_attached) {
-                       if (! profiler->terminate_writer_thread) {
-                               MonoDomain * root_domain = mono_get_root_domain ();
-                               if (root_domain != NULL) {
-                                       LOG_WRITER_THREAD ("data_writer_thread: attaching thread");
-                                       this_thread = mono_thread_attach (root_domain);
-                                       mono_thread_set_manage_callback (this_thread, thread_detach_callback);
-                                       thread_attached = TRUE;
-                               } else {
-                                       g_error ("Cannot get root domain\n");
-                               }
-                       } else {
-                               /* Execution was too short, pretend we attached and detached. */
-                               thread_attached = TRUE;
-                               thread_detached = TRUE;
-                       }
-               }
-               
                if (profiler->heap_shot_was_signalled) {
                        LOG_WRITER_THREAD ("data_writer_thread: starting requested collection");
                        mono_gc_collect (mono_gc_max_generation ());
@@ -4370,35 +5219,63 @@ data_writer_thread (gpointer nothing) {
                }
                
                statistical_data = profiler->statistical_data_ready;
-               done = (statistical_data == NULL) && (profiler->heap_shot_write_jobs == NULL);
+               done = (statistical_data == NULL) && (profiler->heap_shot_write_jobs == NULL) && (profiler->writer_thread_flush_everything == FALSE);
                
-               if (!done) {
-                       LOG_WRITER_THREAD ("data_writer_thread: acquiring lock and writing data");
-                       LOCK_PROFILER ();
-                       
-                       // This makes sure that all method ids are in place
-                       LOG_WRITER_THREAD ("data_writer_thread: writing mapping...");
-                       flush_all_mappings ();
-                       LOG_WRITER_THREAD ("data_writer_thread: wrote mapping");
-                       
-                       if ((statistical_data != NULL) && ! thread_detached) {
-                               LOG_WRITER_THREAD ("data_writer_thread: writing statistical data...");
-                               profiler->statistical_data_ready = NULL;
-                               write_statistical_data_block (statistical_data);
-                               statistical_data->next_free_index = 0;
-                               statistical_data->first_unwritten_index = 0;
-                               profiler->statistical_data_second_buffer = statistical_data;
-                               LOG_WRITER_THREAD ("data_writer_thread: wrote statistical data");
+               if ((!done) && thread_attached) {
+                       if (profiler->writer_thread_flush_everything) {
+                               /* Note that this assumes the lock is held by the thread that woke us up! */
+                               if (! thread_detached) {
+                                       LOG_WRITER_THREAD ("data_writer_thread: flushing everything...");
+                                       flush_everything ();
+                                       profiler->writer_thread_flush_everything = FALSE;
+                                       WRITER_EVENT_DONE_RAISE ();
+                                       LOG_WRITER_THREAD ("data_writer_thread: flushed everything.");
+                               } else {
+                                       LOG_WRITER_THREAD ("data_writer_thread: flushing requested, but thread is detached...");
+                                       profiler->writer_thread_flush_everything = FALSE;
+                                       WRITER_EVENT_DONE_RAISE ();
+                                       LOG_WRITER_THREAD ("data_writer_thread: done event raised.");
+                               }
+                       } else {
+                               LOG_WRITER_THREAD ("data_writer_thread: acquiring lock and writing data");
+                               LOCK_PROFILER ();
+                               
+                               // This makes sure that all method ids are in place
+                               LOG_WRITER_THREAD ("data_writer_thread: writing mapping...");
+                               flush_all_mappings ();
+                               LOG_WRITER_THREAD ("data_writer_thread: wrote mapping");
+                               
+                               if ((statistical_data != NULL) && ! thread_detached) {
+                                       LOG_WRITER_THREAD ("data_writer_thread: writing statistical data...");
+                                       profiler->statistical_data_ready = NULL;
+                                       write_statistical_data_block (statistical_data);
+                                       statistical_data->next_free_index = 0;
+                                       statistical_data->first_unwritten_index = 0;
+                                       profiler->statistical_data_second_buffer = statistical_data;
+                                       LOG_WRITER_THREAD ("data_writer_thread: wrote statistical data");
+                               }
+                               
+                               profiler_process_heap_shot_write_jobs ();
+                               
+                               UNLOCK_PROFILER ();
+                               LOG_WRITER_THREAD ("data_writer_thread: wrote data and released lock");
+                       }
+               } else {
+                       if (profiler->writer_thread_flush_everything) {
+                               LOG_WRITER_THREAD ("data_writer_thread: flushing requested, but thread is not attached...");
+                               profiler->writer_thread_flush_everything = FALSE;
+                               WRITER_EVENT_DONE_RAISE ();
+                               LOG_WRITER_THREAD ("data_writer_thread: done event raised.");
                        }
-                       
-                       profiler_process_heap_shot_write_jobs ();
-                       
-                       UNLOCK_PROFILER ();
-                       LOG_WRITER_THREAD ("data_writer_thread: wrote data and released lock");
                }
                
                if (profiler->detach_writer_thread) {
                        if (this_thread != NULL) {
+                               LOG_WRITER_THREAD ("data_writer_thread: detach requested, acquiring lock and flushing data");
+                               LOCK_PROFILER ();
+                               flush_everything ();
+                               UNLOCK_PROFILER ();
+                               LOG_WRITER_THREAD ("data_writer_thread: flushed data and released lock");
                                LOG_WRITER_THREAD ("data_writer_thread: detaching thread");
                                mono_thread_detach (this_thread);
                                this_thread = NULL;
@@ -4411,6 +5288,7 @@ data_writer_thread (gpointer nothing) {
                
                if (profiler->terminate_writer_thread) {
                LOG_WRITER_THREAD ("data_writer_thread: exiting thread");
+                       CLEANUP_WRITER_THREAD ();
                        EXIT_THREAD ();
                }
        }
@@ -4426,7 +5304,7 @@ mono_profiler_startup (const char *desc)
 {
        profiler = g_new0 (MonoProfiler, 1);
        
-       setup_user_options ((desc != NULL) ? desc : "");
+       setup_user_options ((desc != NULL) ? desc : DEFAULT_ARGUMENTS);
        
        INITIALIZE_PROFILER_MUTEX ();
        MONO_PROFILER_GET_CURRENT_TIME (profiler->start_time);
@@ -4454,7 +5332,7 @@ mono_profiler_startup (const char *desc)
        profiler->executable_files.new_files = NULL; 
        
        profiler->heap_shot_write_jobs = NULL;
-       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot) {
+       if (profiler->action_flags.unreachable_objects || profiler->action_flags.heap_shot || profiler->action_flags.collection_summary) {
                profiler_heap_buffers_setup (&(profiler->heap));
        } else {
                profiler_heap_buffers_clear (&(profiler->heap));
@@ -4471,6 +5349,7 @@ mono_profiler_startup (const char *desc)
        OPEN_FILE ();
        
        write_intro_block ();
+       write_directives_block (TRUE);
        
        mono_profiler_install (profiler, profiler_shutdown);
        
@@ -4490,6 +5369,7 @@ mono_profiler_startup (const char *desc)
        mono_profiler_install_statistical (statistical_hit);
        mono_profiler_install_statistical_call_chain (statistical_call_chain, profiler->statistical_call_chain_depth);
        mono_profiler_install_gc (gc_event, gc_resize);
+       mono_profiler_install_runtime_initialized (runtime_initialized);
 #if (HAS_OPROFILE)
        mono_profiler_install_jit_end (method_jit_result);
 #endif