2009-01-03 Rodrigo Kumpera <rkumpera@novell.com>
[mono.git] / mono / metadata / threads.c
index 2e140cb9c9ce304fdf33d1e8dfa4d438c20e032f..17f56971ac329912fdc2db775c08bd5d491409d4 100644 (file)
@@ -10,9 +10,6 @@
  */
 
 #include <config.h>
-#ifdef PLATFORM_WIN32
-#define _WIN32_WINNT 0x0500
-#endif
 
 #include <glib.h>
 #include <signal.h>
 #include <mono/metadata/marshal.h>
 #include <mono/io-layer/io-layer.h>
 #include <mono/metadata/object-internals.h>
+#include <mono/metadata/mono-debug-debugger.h>
 #include <mono/utils/mono-compiler.h>
+#include <mono/utils/mono-mmap.h>
+#include <mono/utils/mono-membar.h>
+#include <mono/utils/mono-time.h>
 
-#include <mono/os/gc_wrapper.h>
+#include <mono/metadata/gc-internal.h>
 
 /*#define THREAD_DEBUG(a) do { a; } while (0)*/
 #define THREAD_DEBUG(a)
 /*#define LIBGC_DEBUG(a) do { a; } while (0)*/
 #define LIBGC_DEBUG(a)
 
+/* Provide this for systems with glib < 2.6 */
+#ifndef G_GSIZE_FORMAT
+#   if GLIB_SIZEOF_LONG == 8
+#       define G_GSIZE_FORMAT "lu"
+#   else
+#       define G_GSIZE_FORMAT "u"
+#   endif
+#endif
+
 struct StartInfo 
 {
        guint32 (*func)(void *);
@@ -61,23 +71,40 @@ typedef union {
        gdouble fval;
 } LongDoubleUnion;
  
+typedef struct _MonoThreadDomainTls MonoThreadDomainTls;
+struct _MonoThreadDomainTls {
+       MonoThreadDomainTls *next;
+       guint32 offset;
+       guint32 size;
+};
+
 typedef struct {
        int idx;
        int offset;
+       MonoThreadDomainTls *freelist;
 } StaticDataInfo;
 
-/* Number of cached culture objects in the MonoThread->culture_info array */
-#define NUM_CACHED_CULTURES 4
+typedef struct {
+       gpointer p;
+       MonoHazardousFreeFunc free_func;
+} DelayedFreeItem;
 
-/*
- * The "os_handle" field of the WaitHandle class.
+/* Number of cached culture objects in the MonoThread->cached_culture_info array
+ * (per-type): we use the first NUM entries for CultureInfo and the last for
+ * UICultureInfo. So the size of the array is really NUM_CACHED_CULTURES * 2.
  */
-static MonoClassField *wait_handle_os_handle_field = NULL;
+#define NUM_CACHED_CULTURES 4
+#define CULTURES_START_IDX 0
+#define UICULTURES_START_IDX NUM_CACHED_CULTURES
 
 /* Controls access to the 'threads' hash table */
+#define mono_threads_lock() EnterCriticalSection (&threads_mutex)
+#define mono_threads_unlock() LeaveCriticalSection (&threads_mutex)
 static CRITICAL_SECTION threads_mutex;
 
 /* Controls access to context static data */
+#define mono_contexts_lock() EnterCriticalSection (&contexts_mutex)
+#define mono_contexts_unlock() LeaveCriticalSection (&contexts_mutex)
 static CRITICAL_SECTION contexts_mutex;
 
 /* Holds current status of static data heap */
@@ -89,6 +116,13 @@ static StaticDataInfo context_static_info;
  */
 static MonoGHashTable *threads=NULL;
 
+/*
+ * Threads which are starting up and they are not in the 'threads' hash yet.
+ * When handle_store is called for a thread, it will be removed from this hash table.
+ * Protected by mono_threads_lock ().
+ */
+static MonoGHashTable *threads_starting_up = NULL;
+
 /* The TLS key that holds the MonoObject assigned to each thread */
 static guint32 current_object_key = -1;
 
@@ -114,10 +148,10 @@ static MonoThreadStartCB mono_thread_start_cb = NULL;
 static MonoThreadAttachCB mono_thread_attach_cb = NULL;
 
 /* function called at thread cleanup */
-static MonoThreadCleanupFunc mono_thread_cleanup = NULL;
+static MonoThreadCleanupFunc mono_thread_cleanup_fn = NULL;
 
-/* function called when a new thread has been created */
-static MonoThreadCallbacks *mono_thread_callbacks = NULL;
+/* function called to notify the runtime about a pending exception on the current thread */
+static MonoThreadNotifyPendingExcFunc mono_thread_notify_pending_exc_fn = NULL;
 
 /* The default stack size for each thread */
 static guint32 default_stacksize = 0;
@@ -126,13 +160,40 @@ static guint32 default_stacksize = 0;
 static void thread_adjust_static_data (MonoThread *thread);
 static void mono_init_static_data_info (StaticDataInfo *static_data);
 static guint32 mono_alloc_static_data_slot (StaticDataInfo *static_data, guint32 size, guint32 align);
+static gboolean mono_thread_resume (MonoThread* thread);
+static void mono_thread_start (MonoThread *thread);
+static void signal_thread_state_change (MonoThread *thread);
 
 /* Spin lock for InterlockedXXX 64 bit functions */
+#define mono_interlocked_lock() EnterCriticalSection (&interlocked_mutex)
+#define mono_interlocked_unlock() LeaveCriticalSection (&interlocked_mutex)
 static CRITICAL_SECTION interlocked_mutex;
 
 /* global count of thread interruptions requested */
 static gint32 thread_interruption_requested = 0;
 
+/* Event signaled when a thread changes its background mode */
+static HANDLE background_change_event;
+
+/* The table for small ID assignment */
+static CRITICAL_SECTION small_id_mutex;
+static int small_id_table_size = 0;
+static int small_id_next = 0;
+static int highest_small_id = -1;
+static MonoThread **small_id_table = NULL;
+
+/* The hazard table */
+#define HAZARD_TABLE_MAX_SIZE  16384 /* There cannot be more threads than this number. */
+static volatile int hazard_table_size = 0;
+static MonoThreadHazardPointers * volatile hazard_table = NULL;
+
+/* The table where we keep pointers to blocks to be freed but that
+   have to wait because they're guarded by a hazard pointer. */
+static CRITICAL_SECTION delayed_free_table_mutex;
+static GArray *delayed_free_table = NULL;
+
+static gboolean shutting_down = FALSE;
+
 guint32
 mono_thread_get_tls_key (void)
 {
@@ -149,12 +210,23 @@ mono_thread_get_tls_offset (void)
 
 /* handle_store() and handle_remove() manage the array of threads that
  * still need to be waited for when the main thread exits.
+ *
+ * If handle_store() returns FALSE the thread must not be started
+ * because Mono is shutting down.
  */
-static void handle_store(MonoThread *thread)
+static gboolean handle_store(MonoThread *thread)
 {
-       EnterCriticalSection(&threads_mutex);
+       mono_threads_lock ();
+
+       THREAD_DEBUG (g_message ("%s: thread %p ID %"G_GSIZE_FORMAT, __func__, thread, (gsize)thread->tid));
+
+       if (threads_starting_up)
+               mono_g_hash_table_remove (threads_starting_up, thread);
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": thread %p ID %d", thread, thread->tid));
+       if (shutting_down) {
+               mono_threads_unlock ();
+               return FALSE;
+       }
 
        if(threads==NULL) {
                MONO_GC_REGISTER_ROOT (threads);
@@ -164,20 +236,41 @@ static void handle_store(MonoThread *thread)
        /* We don't need to duplicate thread->handle, because it is
         * only closed when the thread object is finalized by the GC.
         */
-       mono_g_hash_table_insert(threads, GUINT_TO_POINTER(thread->tid), thread);
-       LeaveCriticalSection(&threads_mutex);
+       mono_g_hash_table_insert(threads, (gpointer)(gsize)(thread->tid),
+                                thread);
+
+       mono_threads_unlock ();
+
+       return TRUE;
 }
 
-static void handle_remove(guint32 tid)
+static gboolean handle_remove(MonoThread *thread)
 {
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": thread ID %d", tid));
+       gboolean ret;
+       gsize tid = thread->tid;
+
+       THREAD_DEBUG (g_message ("%s: thread ID %"G_GSIZE_FORMAT, __func__, tid));
 
-       EnterCriticalSection(&threads_mutex);
+       mono_threads_lock ();
 
-       if (threads)
-               mono_g_hash_table_remove (threads, GUINT_TO_POINTER(tid));
+       if (threads) {
+               /* We have to check whether the thread object for the
+                * tid is still the same in the table because the
+                * thread might have been destroyed and the tid reused
+                * in the meantime, in which case the tid would be in
+                * the table, but with another thread object.
+                */
+               if (mono_g_hash_table_lookup (threads, (gpointer)tid) == thread) {
+                       mono_g_hash_table_remove (threads, (gpointer)tid);
+                       ret = TRUE;
+               } else {
+                       ret = FALSE;
+               }
+       }
+       else
+               ret = FALSE;
        
-       LeaveCriticalSection(&threads_mutex);
+       mono_threads_unlock ();
 
        /* Don't close the handle here, wait for the object finalizer
         * to do it. Otherwise, the following race condition applies:
@@ -192,31 +285,260 @@ static void handle_remove(guint32 tid)
         * thread calling Join() still has a reference to the first
         * thread's object.
         */
+       return ret;
+}
+
+/*
+ * Allocate a small thread id.
+ *
+ * FIXME: The biggest part of this function is very similar to
+ * domain_id_alloc() in domain.c and should be merged.
+ */
+static int
+small_id_alloc (MonoThread *thread)
+{
+       int id = -1, i;
+
+       EnterCriticalSection (&small_id_mutex);
+
+       if (!small_id_table) {
+               small_id_table_size = 2;
+               small_id_table = mono_gc_alloc_fixed (small_id_table_size * sizeof (MonoThread*), NULL);
+       }
+       for (i = small_id_next; i < small_id_table_size; ++i) {
+               if (!small_id_table [i]) {
+                       id = i;
+                       break;
+               }
+       }
+       if (id == -1) {
+               for (i = 0; i < small_id_next; ++i) {
+                       if (!small_id_table [i]) {
+                               id = i;
+                               break;
+                       }
+               }
+       }
+       if (id == -1) {
+               MonoThread **new_table;
+               int new_size = small_id_table_size * 2;
+               if (new_size >= (1 << 16))
+                       g_assert_not_reached ();
+               id = small_id_table_size;
+               new_table = mono_gc_alloc_fixed (new_size * sizeof (MonoThread*), NULL);
+               memcpy (new_table, small_id_table, small_id_table_size * sizeof (void*));
+               mono_gc_free_fixed (small_id_table);
+               small_id_table = new_table;
+               small_id_table_size = new_size;
+       }
+       thread->small_id = id;
+       g_assert (small_id_table [id] == NULL);
+       small_id_table [id] = thread;
+       small_id_next++;
+       if (small_id_next > small_id_table_size)
+               small_id_next = 0;
+
+       if (id >= hazard_table_size) {
+               gpointer page_addr;
+               int pagesize = mono_pagesize ();
+               int num_pages = (hazard_table_size * sizeof (MonoThreadHazardPointers) + pagesize - 1) / pagesize;
+
+               if (hazard_table == NULL) {
+                       hazard_table = mono_valloc (NULL,
+                               sizeof (MonoThreadHazardPointers) * HAZARD_TABLE_MAX_SIZE,
+                               MONO_MMAP_NONE);
+               }
+
+               g_assert (hazard_table != NULL);
+               page_addr = (guint8*)hazard_table + num_pages * pagesize;
+
+               g_assert (id < HAZARD_TABLE_MAX_SIZE);
+
+               mono_mprotect (page_addr, pagesize, MONO_MMAP_READ | MONO_MMAP_WRITE);
+
+               ++num_pages;
+               hazard_table_size = num_pages * pagesize / sizeof (MonoThreadHazardPointers);
+
+               g_assert (id < hazard_table_size);
+
+               hazard_table [id].hazard_pointers [0] = NULL;
+               hazard_table [id].hazard_pointers [1] = NULL;
+       }
+
+       if (id > highest_small_id) {
+               highest_small_id = id;
+               mono_memory_write_barrier ();
+       }
+
+       LeaveCriticalSection (&small_id_mutex);
+
+       return id;
+}
+
+static void
+small_id_free (int id)
+{
+       g_assert (id >= 0 && id < small_id_table_size);
+       g_assert (small_id_table [id] != NULL);
+
+       small_id_table [id] = NULL;
+}
+
+static gboolean
+is_pointer_hazardous (gpointer p)
+{
+       int i;
+       int highest = highest_small_id;
+
+       g_assert (highest < hazard_table_size);
+
+       for (i = 0; i <= highest; ++i) {
+               if (hazard_table [i].hazard_pointers [0] == p
+                               || hazard_table [i].hazard_pointers [1] == p)
+                       return TRUE;
+       }
+
+       return FALSE;
+}
+
+MonoThreadHazardPointers*
+mono_hazard_pointer_get (void)
+{
+       MonoThread *current_thread = mono_thread_current ();
+
+       if (!(current_thread && current_thread->small_id >= 0)) {
+               static MonoThreadHazardPointers emerg_hazard_table;
+               g_warning ("Thread %p may have been prematurely finalized", current_thread);
+               return &emerg_hazard_table;
+       }
+
+       return &hazard_table [current_thread->small_id];
+}
+
+static void
+try_free_delayed_free_item (int index)
+{
+       if (delayed_free_table->len > index) {
+               DelayedFreeItem item = { NULL, NULL };
+
+               EnterCriticalSection (&delayed_free_table_mutex);
+               /* We have to check the length again because another
+                  thread might have freed an item before we acquired
+                  the lock. */
+               if (delayed_free_table->len > index) {
+                       item = g_array_index (delayed_free_table, DelayedFreeItem, index);
+
+                       if (!is_pointer_hazardous (item.p))
+                               g_array_remove_index_fast (delayed_free_table, index);
+                       else
+                               item.p = NULL;
+               }
+               LeaveCriticalSection (&delayed_free_table_mutex);
+
+               if (item.p != NULL)
+                       item.free_func (item.p);
+       }
+}
+
+void
+mono_thread_hazardous_free_or_queue (gpointer p, MonoHazardousFreeFunc free_func)
+{
+       int i;
+
+       /* First try to free a few entries in the delayed free
+          table. */
+       for (i = 2; i >= 0; --i)
+               try_free_delayed_free_item (i);
+
+       /* Now see if the pointer we're freeing is hazardous.  If it
+          isn't, free it.  Otherwise put it in the delay list. */
+       if (is_pointer_hazardous (p)) {
+               DelayedFreeItem item = { p, free_func };
+
+               ++mono_stats.hazardous_pointer_count;
+
+               EnterCriticalSection (&delayed_free_table_mutex);
+               g_array_append_val (delayed_free_table, item);
+               LeaveCriticalSection (&delayed_free_table_mutex);
+       } else
+               free_func (p);
+}
+
+void
+mono_thread_hazardous_try_free_all (void)
+{
+       int len;
+       int i;
+
+       if (!delayed_free_table)
+               return;
+
+       len = delayed_free_table->len;
+
+       for (i = len - 1; i >= 0; --i)
+               try_free_delayed_free_item (i);
+}
+
+static void ensure_synch_cs_set (MonoThread *thread)
+{
+       CRITICAL_SECTION *synch_cs;
+       
+       if (thread->synch_cs != NULL) {
+               return;
+       }
+       
+       synch_cs = g_new0 (CRITICAL_SECTION, 1);
+       InitializeCriticalSection (synch_cs);
+       
+       if (InterlockedCompareExchangePointer ((gpointer *)&thread->synch_cs,
+                                              synch_cs, NULL) != NULL) {
+               /* Another thread must have installed this CS */
+               DeleteCriticalSection (synch_cs);
+               g_free (synch_cs);
+       }
 }
 
+/*
+ * NOTE: this function can be called also for threads different from the current one:
+ * make sure no code called from it will ever assume it is run on the thread that is
+ * getting cleaned up.
+ */
 static void thread_cleanup (MonoThread *thread)
 {
-       mono_release_type_locks (thread);
+       g_assert (thread != NULL);
 
-       if (!mono_monitor_enter (thread->synch_lock))
+       /* if the thread is not in the hash it has been removed already */
+       if (!handle_remove (thread))
                return;
+       mono_release_type_locks (thread);
+
+       EnterCriticalSection (thread->synch_cs);
 
        thread->state |= ThreadState_Stopped;
-       mono_monitor_exit (thread->synch_lock);
+       thread->state &= ~ThreadState_Background;
 
+       LeaveCriticalSection (thread->synch_cs);
+       
        mono_profiler_thread_end (thread->tid);
-       handle_remove (thread->tid);
 
-       mono_thread_pop_appdomain_ref ();
+       if (thread == mono_thread_current ())
+               mono_thread_pop_appdomain_ref ();
 
        if (thread->serialized_culture_info)
                g_free (thread->serialized_culture_info);
 
-       mono_gc_free_fixed (thread->culture_info);
-       mono_gc_free_fixed (thread->ui_culture_info);
+       g_free (thread->name);
+
+       thread->cached_culture_info = NULL;
 
-       if (mono_thread_cleanup)
-               mono_thread_cleanup (thread);
+       mono_gc_free_fixed (thread->static_data);
+       thread->static_data = NULL;
+
+       if (mono_thread_cleanup_fn)
+               mono_thread_cleanup_fn (thread);
+
+       small_id_free (thread->small_id);
+       thread->small_id = -2;
 }
 
 static guint32 WINAPI start_wrapper(void *data)
@@ -224,12 +546,12 @@ static guint32 WINAPI start_wrapper(void *data)
        struct StartInfo *start_info=(struct StartInfo *)data;
        guint32 (*start_func)(void *);
        void *start_arg;
-       guint32 tid;
+       gsize tid;
        MonoThread *thread=start_info->obj;
        MonoObject *start_delegate = start_info->delegate;
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) Start wrapper", GetCurrentThreadId ()));
-       
+       THREAD_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Start wrapper", __func__, GetCurrentThreadId ()));
+
        /* We can be sure start_info->obj->tid and
         * start_info->obj->handle have been set, because the thread
         * was created suspended, and these values were set before the
@@ -239,10 +561,16 @@ static guint32 WINAPI start_wrapper(void *data)
        tid=thread->tid;
 
        SET_CURRENT_OBJECT (thread);
+
+       mono_monitor_init_tls ();
+
+       /* Every thread references the appdomain which created it */
+       mono_thread_push_appdomain_ref (start_info->domain);
        
        if (!mono_domain_set (start_info->domain, FALSE)) {
                /* No point in raising an appdomain_unloaded exception here */
                /* FIXME: Cleanup here */
+               mono_thread_pop_appdomain_ref ();
                return 0;
        }
 
@@ -256,34 +584,38 @@ static guint32 WINAPI start_wrapper(void *data)
        mono_thread_new_init (tid, &tid, start_func);
        thread->stack_ptr = &tid;
 
-       LIBGC_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION
-                  ": (%d,%d) Setting thread stack to %p",
-                  GetCurrentThreadId (), getpid (), thread->stack_ptr));
+       LIBGC_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT",%d) Setting thread stack to %p", __func__, GetCurrentThreadId (), getpid (), thread->stack_ptr));
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION
-                  ": (%d) Setting current_object_key to %p",
-                  GetCurrentThreadId (), thread));
+       THREAD_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Setting current_object_key to %p", __func__, GetCurrentThreadId (), thread));
 
        mono_profiler_thread_start (tid);
 
+       /* On 2.0 profile (and higher), set explicitly since state might have been
+          Unknown */
+       if (mono_framework_version () != 1) {
+               if (thread->apartment_state == ThreadApartmentState_Unknown)
+                       thread->apartment_state = ThreadApartmentState_MTA;
+       }
+
+       mono_thread_init_apartment_state ();
+
        if(thread->start_notify!=NULL) {
                /* Let the thread that called Start() know we're
                 * ready
                 */
                ReleaseSemaphore (thread->start_notify, 1, NULL);
        }
-       
-       g_free (start_info);
 
-       /* Every thread references the appdomain which created it */
-       mono_thread_push_appdomain_ref (mono_domain_get ());
+       MONO_GC_UNREGISTER_ROOT (start_info->start_arg);
+       g_free (start_info);
 
        thread_adjust_static_data (thread);
 #ifdef DEBUG
-       g_message (G_GNUC_PRETTY_FUNCTION "start_wrapper for %d\n", thread->tid);
+       g_message ("%s: start_wrapper for %"G_GSIZE_FORMAT, __func__,
+                  thread->tid);
 #endif
 
-       /* start_func is set only for unamanged start functions */
+       /* start_func is set only for unmanaged start functions */
        if (start_func) {
                start_func (start_arg);
        } else {
@@ -299,8 +631,16 @@ static guint32 WINAPI start_wrapper(void *data)
         * call thread_cleanup() on this thread's behalf.
         */
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) Start wrapper terminating",
-                 GetCurrentThreadId ()));
+       THREAD_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Start wrapper terminating", __func__, GetCurrentThreadId ()));
+
+       thread_cleanup (thread);
+
+       /* Do any cleanup needed for apartment state. This
+        * cannot be done in thread_cleanup since thread_cleanup could be 
+        * called for a thread other than the current thread.
+        * mono_thread_cleanup_apartment_state cleans up apartment
+        * for the current thead */
+       mono_thread_cleanup_apartment_state ();
 
        /* Remove the reference to the thread object in the TLS data,
         * so the thread object can be finalized.  This won't be
@@ -312,19 +652,14 @@ static guint32 WINAPI start_wrapper(void *data)
         */
        SET_CURRENT_OBJECT (NULL);
 
-       thread_cleanup (thread);
-
        return(0);
 }
 
-void mono_thread_new_init (guint32 tid, gpointer stack_start, gpointer func)
+void mono_thread_new_init (gsize tid, gpointer stack_start, gpointer func)
 {
        if (mono_thread_start_cb) {
                mono_thread_start_cb (tid, stack_start, func);
        }
-
-       if (mono_thread_callbacks)
-               (* mono_thread_callbacks->thread_created) (tid, stack_start, func);
 }
 
 void mono_threads_set_default_stacksize (guint32 stacksize)
@@ -337,13 +672,13 @@ guint32 mono_threads_get_default_stacksize (void)
        return default_stacksize;
 }
 
-void mono_thread_create (MonoDomain *domain, gpointer func, gpointer arg)
+void mono_thread_create_internal (MonoDomain *domain, gpointer func, gpointer arg, gboolean threadpool_thread)
 {
        MonoThread *thread;
        HANDLE thread_handle;
        struct StartInfo *start_info;
-       guint32 tid;
-       
+       gsize tid;
+
        thread=(MonoThread *)mono_object_new (domain,
                                              mono_defaults.thread_class);
 
@@ -352,44 +687,128 @@ void mono_thread_create (MonoDomain *domain, gpointer func, gpointer arg)
        start_info->obj = thread;
        start_info->domain = domain;
        start_info->start_arg = arg;
-       
+
+       /* 
+        * The argument may be an object reference, and there is no ref to keep it alive
+        * when the new thread is started but not yet registered with the collector.
+        */
+       MONO_GC_REGISTER_ROOT (start_info->start_arg);
+
+       mono_threads_lock ();
+       if (shutting_down) {
+               mono_threads_unlock ();
+               return;
+       }
+       if (threads_starting_up == NULL) {
+               MONO_GC_REGISTER_ROOT (threads_starting_up);
+               threads_starting_up = mono_g_hash_table_new (NULL, NULL);
+       }
+       mono_g_hash_table_insert (threads_starting_up, thread, thread);
+       mono_threads_unlock (); 
+
        /* Create suspended, so we can do some housekeeping before the thread
         * starts
         */
        thread_handle = CreateThread(NULL, default_stacksize_for_thread (thread), (LPTHREAD_START_ROUTINE)start_wrapper, start_info,
                                     CREATE_SUSPENDED, &tid);
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": Started thread ID %d (handle %p)",
-                 tid, thread_handle));
+       THREAD_DEBUG (g_message ("%s: Started thread ID %"G_GSIZE_FORMAT" (handle %p)", __func__, tid, thread_handle));
        if (thread_handle == NULL) {
                /* The thread couldn't be created, so throw an exception */
+               MONO_GC_UNREGISTER_ROOT (start_info->start_arg);
+               mono_threads_lock ();
+               mono_g_hash_table_remove (threads_starting_up, thread);
+               mono_threads_unlock ();
+               g_free (start_info);
                mono_raise_exception (mono_get_exception_execution_engine ("Couldn't create thread"));
                return;
        }
 
        thread->handle=thread_handle;
        thread->tid=tid;
+       thread->apartment_state=ThreadApartmentState_Unknown;
+       small_id_alloc (thread);
+
+       thread->synch_cs = g_new0 (CRITICAL_SECTION, 1);
+       InitializeCriticalSection (thread->synch_cs);
 
-       thread->synch_lock=mono_object_new (domain, mono_defaults.object_class);
-                                                 
-       handle_store(thread);
+       thread->threadpool_thread = threadpool_thread;
+       if (threadpool_thread)
+               mono_thread_set_state (thread, ThreadState_Background);
 
-       ResumeThread (thread_handle);
+       if (handle_store (thread))
+               ResumeThread (thread_handle);
 }
 
+void
+mono_thread_create (MonoDomain *domain, gpointer func, gpointer arg)
+{
+       mono_thread_create_internal (domain, func, arg, FALSE);
+}
+
+/*
+ * mono_thread_get_stack_bounds:
+ *
+ *   Return the address and size of the current threads stack. Return NULL as the 
+ * stack address if the stack address cannot be determined.
+ */
+void
+mono_thread_get_stack_bounds (guint8 **staddr, size_t *stsize)
+{
+#if defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
+       *staddr = (guint8*)pthread_get_stackaddr_np (pthread_self ());
+       *stsize = pthread_get_stacksize_np (pthread_self ());
+       *staddr = (guint8*)((gssize)*staddr & ~(mono_pagesize () - 1));
+       return;
+       /* FIXME: simplify the mess below */
+#elif !defined(PLATFORM_WIN32)
+       pthread_attr_t attr;
+       guint8 *current = (guint8*)&attr;
+
+       pthread_attr_init (&attr);
+#ifdef HAVE_PTHREAD_GETATTR_NP
+       pthread_getattr_np (pthread_self(), &attr);
+#else
+#ifdef HAVE_PTHREAD_ATTR_GET_NP
+       pthread_attr_get_np (pthread_self(), &attr);
+#elif defined(sun)
+       *staddr = NULL;
+       pthread_attr_getstacksize (&attr, &stsize);
+#else
+       *staddr = NULL;
+       *stsize = 0;
+       return;
+#endif
+#endif
+
+#ifndef sun
+       pthread_attr_getstack (&attr, (void**)staddr, stsize);
+       if (*staddr)
+               g_assert ((current > *staddr) && (current < *staddr + *stsize));
+#endif
+
+       pthread_attr_destroy (&attr); 
+#endif
+
+       /* When running under emacs, sometimes staddr is not aligned to a page size */
+       *staddr = (guint8*)((gssize)*staddr & ~(mono_pagesize () - 1));
+}      
+
 MonoThread *
 mono_thread_attach (MonoDomain *domain)
 {
        MonoThread *thread;
        HANDLE thread_handle;
-       guint32 tid;
+       gsize tid;
 
        if ((thread = mono_thread_current ())) {
+               if (domain != mono_domain_get ())
+                       mono_domain_set (domain, TRUE);
                /* Already attached */
                return thread;
        }
 
-       if (!mono_gc_is_gc_thread ()) {
-               g_error ("Thread %p calling into managed code is not registered with the GC. On UNIX, this can be fixed by #include-ing <gc.h> before <pthread.h> in the file containing the thread creation code.", GetCurrentThread ());
+       if (!mono_gc_register_thread (&domain)) {
+               g_error ("Thread %"G_GSIZE_FORMAT" calling into managed code is not registered with the GC. On UNIX, this can be fixed by #include-ing <gc.h> before <pthread.h> in the file containing the thread creation code.", GetCurrentThreadId ());
        }
 
        thread = (MonoThread *)mono_object_new (domain,
@@ -400,34 +819,49 @@ mono_thread_attach (MonoDomain *domain)
 
        tid=GetCurrentThreadId ();
 
-#ifdef PLATFORM_WIN32
        /* 
         * The handle returned by GetCurrentThread () is a pseudo handle, so it can't be used to
         * refer to the thread from other threads for things like aborting.
         */
        DuplicateHandle (GetCurrentProcess (), thread_handle, GetCurrentProcess (), &thread_handle, 
                                         THREAD_ALL_ACCESS, TRUE, 0);
-#endif
 
        thread->handle=thread_handle;
        thread->tid=tid;
-       thread->synch_lock=mono_object_new (domain, mono_defaults.object_class);
+       thread->apartment_state=ThreadApartmentState_Unknown;
+       small_id_alloc (thread);
+       thread->stack_ptr = &tid;
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": Attached thread ID %d (handle %p)",
-                 tid, thread_handle));
+       thread->synch_cs = g_new0 (CRITICAL_SECTION, 1);
+       InitializeCriticalSection (thread->synch_cs);
 
-       handle_store(thread);
+       THREAD_DEBUG (g_message ("%s: Attached thread ID %"G_GSIZE_FORMAT" (handle %p)", __func__, tid, thread_handle));
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) Setting current_object_key to %p",
-                  GetCurrentThreadId (), thread));
+       if (!handle_store (thread)) {
+               /* Mono is shutting down, so just wait for the end */
+               for (;;)
+                       Sleep (10000);
+       }
+
+       THREAD_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Setting current_object_key to %p", __func__, GetCurrentThreadId (), thread));
 
        SET_CURRENT_OBJECT (thread);
        mono_domain_set (domain, TRUE);
 
+       mono_monitor_init_tls ();
+
        thread_adjust_static_data (thread);
 
        if (mono_thread_attach_cb) {
-               mono_thread_attach_cb (tid, &tid);
+               guint8 *staddr;
+               size_t stsize;
+
+               mono_thread_get_stack_bounds (&staddr, &stsize);
+
+               if (staddr == NULL)
+                       mono_thread_attach_cb (tid, &tid);
+               else
+                       mono_thread_attach_cb (tid, staddr + stsize);
        }
 
        return(thread);
@@ -438,10 +872,16 @@ mono_thread_detach (MonoThread *thread)
 {
        g_return_if_fail (thread != NULL);
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION "mono_thread_detach for %d\n", thread->tid));
-       SET_CURRENT_OBJECT (NULL);
+       THREAD_DEBUG (g_message ("%s: mono_thread_detach for %p (%"G_GSIZE_FORMAT")", __func__, thread, (gsize)thread->tid));
        
        thread_cleanup (thread);
+
+       SET_CURRENT_OBJECT (NULL);
+
+       /* Don't need to CloseHandle this thread, even though we took a
+        * reference in mono_thread_attach (), because the GC will do it
+        * when the Thread object is finalised.
+        */
 }
 
 void
@@ -449,8 +889,10 @@ mono_thread_exit ()
 {
        MonoThread *thread = mono_thread_current ();
 
-       SET_CURRENT_OBJECT (NULL);
+       THREAD_DEBUG (g_message ("%s: mono_thread_exit for %p (%"G_GSIZE_FORMAT")", __func__, thread, (gsize)thread->tid));
+
        thread_cleanup (thread);
+       SET_CURRENT_OBJECT (NULL);
 
        /* we could add a callback here for embedders to use. */
        if (thread == mono_thread_get_main ())
@@ -463,64 +905,68 @@ HANDLE ves_icall_System_Threading_Thread_Thread_internal(MonoThread *this,
 {
        guint32 (*start_func)(void *);
        struct StartInfo *start_info;
-       MonoMethod *im;
        HANDLE thread;
-       guint32 tid;
+       gsize tid;
        
        MONO_ARCH_SAVE_REGS;
 
-       THREAD_DEBUG (g_message(G_GNUC_PRETTY_FUNCTION
-                 ": Trying to start a new thread: this (%p) start (%p)", this, start));
+       THREAD_DEBUG (g_message("%s: Trying to start a new thread: this (%p) start (%p)", __func__, this, start));
 
-/* FIXME: remove the code inside BROKEN_THREAD_START once martin gets rid of the
- * thread_start_compile_func stuff.
- */
-#define BROKEN_THREAD_START
-#ifdef BROKEN_THREAD_START
-       im = mono_get_delegate_invoke (start->vtable->klass);
-       im = mono_marshal_get_delegate_invoke (im);
-       if (mono_thread_callbacks)
-               start_func = (* mono_thread_callbacks->thread_start_compile_func) (im);
-       else
-               start_func = mono_compile_method (im);
+       ensure_synch_cs_set (this);
 
-       if(start_func==NULL) {
-               g_warning(G_GNUC_PRETTY_FUNCTION
-                         ": Can't locate start method!");
-               return(NULL);
-       } else {
-#else
+       EnterCriticalSection (this->synch_cs);
+
+       if ((this->state & ThreadState_Unstarted) == 0) {
+               LeaveCriticalSection (this->synch_cs);
+               mono_raise_exception (mono_get_exception_thread_state ("Thread has already been started."));
+               return NULL;
+       }
+
+       this->small_id = -1;
+
+       if ((this->state & ThreadState_Aborted) != 0) {
+               LeaveCriticalSection (this->synch_cs);
+               return this;
+       }
        start_func = NULL;
        {
-#endif
                /* This is freed in start_wrapper */
                start_info = g_new0 (struct StartInfo, 1);
                start_info->func = start_func;
-#ifdef BROKEN_THREAD_START
-               start_info->start_arg = start;
-#else
-               start_info->start_arg = this->start_obj;
-#endif
+               start_info->start_arg = this->start_obj; /* FIXME: GC object stored in unmanaged memory */
                start_info->delegate = start;
                start_info->obj = this;
                start_info->domain = mono_domain_get ();
 
                this->start_notify=CreateSemaphore (NULL, 0, 0x7fffffff, NULL);
                if(this->start_notify==NULL) {
-                       g_warning (G_GNUC_PRETTY_FUNCTION ": CreateSemaphore error 0x%x", GetLastError ());
+                       LeaveCriticalSection (this->synch_cs);
+                       g_warning ("%s: CreateSemaphore error 0x%x", __func__, GetLastError ());
                        return(NULL);
                }
 
+               mono_threads_lock ();
+               if (threads_starting_up == NULL) {
+                       MONO_GC_REGISTER_ROOT (threads_starting_up);
+                       threads_starting_up = mono_g_hash_table_new (NULL, NULL);
+               }
+               mono_g_hash_table_insert (threads_starting_up, this, this);
+               mono_threads_unlock (); 
+
                thread=CreateThread(NULL, default_stacksize_for_thread (this), (LPTHREAD_START_ROUTINE)start_wrapper, start_info,
                                    CREATE_SUSPENDED, &tid);
                if(thread==NULL) {
-                       g_warning(G_GNUC_PRETTY_FUNCTION
-                                 ": CreateThread error 0x%x", GetLastError());
+                       LeaveCriticalSection (this->synch_cs);
+                       mono_threads_lock ();
+                       mono_g_hash_table_remove (threads_starting_up, this);
+                       mono_threads_unlock ();
+                       g_warning("%s: CreateThread error 0x%x", __func__, GetLastError());
                        return(NULL);
                }
                
                this->handle=thread;
                this->tid=tid;
+               small_id_alloc (this);
 
                /* Don't call handle_store() here, delay it to Start.
                 * We can't join a thread (trying to will just block
@@ -528,65 +974,68 @@ HANDLE ves_icall_System_Threading_Thread_Thread_internal(MonoThread *this,
                 * store the handle till then.
                 */
 
-               THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION
-                         ": Started thread ID %d (handle %p)", tid, thread));
+               mono_thread_start (this);
+               
+               this->state &= ~ThreadState_Unstarted;
+
+               THREAD_DEBUG (g_message ("%s: Started thread ID %"G_GSIZE_FORMAT" (handle %p)", __func__, tid, thread));
 
+               LeaveCriticalSection (this->synch_cs);
                return(thread);
        }
 }
 
+void ves_icall_System_Threading_Thread_Thread_init (MonoThread *this)
+{
+       MONO_ARCH_SAVE_REGS;
+
+       ensure_synch_cs_set (this);
+}
+
 void ves_icall_System_Threading_Thread_Thread_free_internal (MonoThread *this,
                                                             HANDLE thread)
 {
        MONO_ARCH_SAVE_REGS;
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": Closing thread %p, handle %p",
-                  this, thread));
+       THREAD_DEBUG (g_message ("%s: Closing thread %p, handle %p", __func__, this, thread));
 
        CloseHandle (thread);
+
+       DeleteCriticalSection (this->synch_cs);
+       g_free (this->synch_cs);
+       this->synch_cs = NULL;
 }
 
-void ves_icall_System_Threading_Thread_Start_internal(MonoThread *this,
-                                                     HANDLE thread)
+static void mono_thread_start (MonoThread *thread)
 {
        MONO_ARCH_SAVE_REGS;
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) Launching thread %p (%d)",
-                 GetCurrentThreadId (), this, this->tid));
+       THREAD_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Launching thread %p (%"G_GSIZE_FORMAT")", __func__, GetCurrentThreadId (), thread, (gsize)thread->tid));
 
        /* Only store the handle when the thread is about to be
         * launched, to avoid the main thread deadlocking while trying
         * to clean up a thread that will never be signalled.
         */
-       handle_store(this);
-
-       if (mono_thread_callbacks)
-               (* mono_thread_callbacks->start_resume) (this->tid);
-
-       ResumeThread(thread);
+       if (!handle_store (thread))
+               return;
 
-       if (mono_thread_callbacks)
-               (* mono_thread_callbacks->end_resume) (this->tid);
+       ResumeThread (thread->handle);
 
-       if(this->start_notify!=NULL) {
+       if(thread->start_notify!=NULL) {
                /* Wait for the thread to set up its TLS data etc, so
                 * theres no potential race condition if someone tries
                 * to look up the data believing the thread has
                 * started
                 */
 
-               THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION
-                         ": (%d) waiting for thread %p (%d) to start",
-                         GetCurrentThreadId (), this, this->tid));
+               THREAD_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") waiting for thread %p (%"G_GSIZE_FORMAT") to start", __func__, GetCurrentThreadId (), thread, (gsize)thread->tid));
 
-               WaitForSingleObjectEx (this->start_notify, INFINITE, FALSE);
-               CloseHandle (this->start_notify);
-               this->start_notify=NULL;
+               WaitForSingleObjectEx (thread->start_notify, INFINITE, FALSE);
+               CloseHandle (thread->start_notify);
+               thread->start_notify = NULL;
        }
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION
-                 ": (%d) Done launching thread %p (%d)",
-                 GetCurrentThreadId (), this, this->tid));
+       THREAD_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Done launching thread %p (%"G_GSIZE_FORMAT")", __func__, GetCurrentThreadId (), thread, (gsize)thread->tid));
 }
 
 void ves_icall_System_Threading_Thread_Sleep_internal(gint32 ms)
@@ -595,17 +1044,19 @@ void ves_icall_System_Threading_Thread_Sleep_internal(gint32 ms)
        
        MONO_ARCH_SAVE_REGS;
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": Sleeping for %d ms", ms));
+       THREAD_DEBUG (g_message ("%s: Sleeping for %d ms", __func__, ms));
 
-       mono_monitor_enter (thread->synch_lock);
-       thread->state |= ThreadState_WaitSleepJoin;
-       mono_monitor_exit (thread->synch_lock);
+       mono_thread_current_check_pending_interrupt ();
+       
+       mono_thread_set_state (thread, ThreadState_WaitSleepJoin);
        
        SleepEx(ms,TRUE);
        
-       mono_monitor_enter (thread->synch_lock);
-       thread->state &= ~ThreadState_WaitSleepJoin;
-       mono_monitor_exit (thread->synch_lock);
+       mono_thread_clr_state (thread, ThreadState_WaitSleepJoin);
+}
+
+void ves_icall_System_Threading_Thread_SpinWait_nop (void)
+{
 }
 
 gint32
@@ -619,17 +1070,35 @@ ves_icall_System_Threading_Thread_GetDomainID (void)
 MonoString* 
 ves_icall_System_Threading_Thread_GetName_internal (MonoThread *this_obj)
 {
+       MonoString* str;
+
+       ensure_synch_cs_set (this_obj);
+       
+       EnterCriticalSection (this_obj->synch_cs);
+       
        if (!this_obj->name)
-               return NULL;
+               str = NULL;
        else
-               return mono_string_new_utf16 (mono_domain_get (), this_obj->name, this_obj->name_len);
+               str = mono_string_new_utf16 (mono_domain_get (), this_obj->name, this_obj->name_len);
+       
+       LeaveCriticalSection (this_obj->synch_cs);
+       
+       return str;
 }
 
 void 
 ves_icall_System_Threading_Thread_SetName_internal (MonoThread *this_obj, MonoString *name)
 {
-       if (this_obj->name)
-               g_free (this_obj->name);
+       ensure_synch_cs_set (this_obj);
+       
+       EnterCriticalSection (this_obj->synch_cs);
+       
+       if (this_obj->name) {
+               LeaveCriticalSection (this_obj->synch_cs);
+               
+               mono_raise_exception (mono_get_exception_invalid_operation ("Thread.Name can only be set once."));
+               return;
+       }
        if (name) {
                this_obj->name = g_new (gunichar2, mono_string_length (name));
                memcpy (this_obj->name, mono_string_chars (name), mono_string_length (name) * 2);
@@ -637,20 +1106,20 @@ ves_icall_System_Threading_Thread_SetName_internal (MonoThread *this_obj, MonoSt
        }
        else
                this_obj->name = NULL;
+       
+       LeaveCriticalSection (this_obj->synch_cs);
 }
 
-MonoObject*
-ves_icall_System_Threading_Thread_GetCachedCurrentCulture (MonoThread *this)
+static MonoObject*
+lookup_cached_culture (MonoThread *this, MonoDomain *domain, int start_idx)
 {
        MonoObject *res;
-       MonoDomain *domain;
        int i;
 
-       /* No need to lock here */
-       if (this->culture_info) {
+       if (this->cached_culture_info) {
                domain = mono_domain_get ();
-               for (i = 0; i < NUM_CACHED_CULTURES; ++i) {
-                       res = this->culture_info [i];
+               for (i = start_idx; i < start_idx + NUM_CACHED_CULTURES; ++i) {
+                       res = mono_array_get (this->cached_culture_info, MonoObject*, i);
                        if (res && res->vtable->domain == domain)
                                return res;
                }
@@ -659,80 +1128,99 @@ ves_icall_System_Threading_Thread_GetCachedCurrentCulture (MonoThread *this)
        return NULL;
 }
 
+MonoObject*
+ves_icall_System_Threading_Thread_GetCachedCurrentCulture (MonoThread *this)
+{
+       return lookup_cached_culture (this, mono_domain_get (), CULTURES_START_IDX);
+}
+
 MonoArray*
 ves_icall_System_Threading_Thread_GetSerializedCurrentCulture (MonoThread *this)
 {
        MonoArray *res;
 
-       mono_monitor_enter (this->synch_lock);
+       ensure_synch_cs_set (this);
+       
+       EnterCriticalSection (this->synch_cs);
+       
        if (this->serialized_culture_info) {
                res = mono_array_new (mono_domain_get (), mono_defaults.byte_class, this->serialized_culture_info_len);
                memcpy (mono_array_addr (res, guint8, 0), this->serialized_culture_info, this->serialized_culture_info_len);
-       }
-       else
+       } else {
                res = NULL;
-       mono_monitor_exit (this->synch_lock);
+       }
+
+       LeaveCriticalSection (this->synch_cs);
 
        return res;
 }
 
-void
-ves_icall_System_Threading_Thread_SetCachedCurrentCulture (MonoThread *this, MonoObject *culture)
+static void
+cache_culture (MonoThread *this, MonoObject *culture, int start_idx)
 {
        int i;
        MonoDomain *domain = mono_domain_get ();
+       MonoObject *obj;
+       int free_slot = -1;
+       int same_domain_slot = -1;
 
-       mono_monitor_enter (this->synch_lock);
-       if (!this->culture_info) {
-               this->culture_info = mono_gc_alloc_fixed (sizeof (MonoObject*) * NUM_CACHED_CULTURES, NULL);
-       }
+       ensure_synch_cs_set (this);
+       
+       EnterCriticalSection (this->synch_cs);
+       
+       if (!this->cached_culture_info)
+               MONO_OBJECT_SETREF (this, cached_culture_info, mono_array_new (mono_object_domain (this), mono_defaults.object_class, NUM_CACHED_CULTURES * 2));
 
-       for (i = 0; i < NUM_CACHED_CULTURES; ++i) {
-               if (this->culture_info [i]) {
-                       if (this->culture_info [i]->vtable->domain == domain)
-                               /* Replace */
-                               break;
+       for (i = start_idx; i < start_idx + NUM_CACHED_CULTURES; ++i) {
+               obj = mono_array_get (this->cached_culture_info, MonoObject*, i);
+               /* Free entry */
+               if (!obj) {
+                       free_slot = i;
+                       /* we continue, because there may be a slot used with the same domain */
+                       continue;
                }
-               else
-                       /* Free entry */
+               /* Replace */
+               if (obj->vtable->domain == domain) {
+                       same_domain_slot = i;
                        break;
+               }
        }
-       if (i < NUM_CACHED_CULTURES)
-               this->culture_info [i] = culture;
-       mono_monitor_exit (this->synch_lock);
+       if (same_domain_slot >= 0)
+               mono_array_setref (this->cached_culture_info, same_domain_slot, culture);
+       else if (free_slot >= 0)
+               mono_array_setref (this->cached_culture_info, free_slot, culture);
+       /* we may want to replace an existing entry here, even when no suitable slot is found */
+
+       LeaveCriticalSection (this->synch_cs);
+}
+
+void
+ves_icall_System_Threading_Thread_SetCachedCurrentCulture (MonoThread *this, MonoObject *culture)
+{
+       cache_culture (this, culture, CULTURES_START_IDX);
 }
 
 void
 ves_icall_System_Threading_Thread_SetSerializedCurrentCulture (MonoThread *this, MonoArray *arr)
 {
-       mono_monitor_enter (this->synch_lock);
+       ensure_synch_cs_set (this);
+       
+       EnterCriticalSection (this->synch_cs);
+       
        if (this->serialized_culture_info)
                g_free (this->serialized_culture_info);
        this->serialized_culture_info = g_new0 (guint8, mono_array_length (arr));
        this->serialized_culture_info_len = mono_array_length (arr);
        memcpy (this->serialized_culture_info, mono_array_addr (arr, guint8, 0), mono_array_length (arr));
-       mono_monitor_exit (this->synch_lock);
+
+       LeaveCriticalSection (this->synch_cs);
 }
 
 
 MonoObject*
 ves_icall_System_Threading_Thread_GetCachedCurrentUICulture (MonoThread *this)
 {
-       MonoObject *res;
-       MonoDomain *domain;
-       int i;
-
-       /* No need to lock here */
-       if (this->ui_culture_info) {
-               domain = mono_domain_get ();
-               for (i = 0; i < NUM_CACHED_CULTURES; ++i) {
-                       res = this->ui_culture_info [i];
-                       if (res && res->vtable->domain == domain)
-                               return res;
-               }
-       }
-
-       return NULL;
+       return lookup_cached_culture (this, mono_domain_get (), UICULTURES_START_IDX);
 }
 
 MonoArray*
@@ -740,14 +1228,18 @@ ves_icall_System_Threading_Thread_GetSerializedCurrentUICulture (MonoThread *thi
 {
        MonoArray *res;
 
-       mono_monitor_enter (this->synch_lock);
+       ensure_synch_cs_set (this);
+       
+       EnterCriticalSection (this->synch_cs);
+       
        if (this->serialized_ui_culture_info) {
                res = mono_array_new (mono_domain_get (), mono_defaults.byte_class, this->serialized_ui_culture_info_len);
                memcpy (mono_array_addr (res, guint8, 0), this->serialized_ui_culture_info, this->serialized_ui_culture_info_len);
-       }
-       else
+       } else {
                res = NULL;
-       mono_monitor_exit (this->synch_lock);
+       }
+
+       LeaveCriticalSection (this->synch_cs);
 
        return res;
 }
@@ -755,79 +1247,74 @@ ves_icall_System_Threading_Thread_GetSerializedCurrentUICulture (MonoThread *thi
 void
 ves_icall_System_Threading_Thread_SetCachedCurrentUICulture (MonoThread *this, MonoObject *culture)
 {
-       int i;
-       MonoDomain *domain = mono_domain_get ();
-
-       mono_monitor_enter (this->synch_lock);
-       if (!this->ui_culture_info) {
-               this->ui_culture_info = mono_gc_alloc_fixed (sizeof (MonoObject*) * NUM_CACHED_CULTURES, NULL);
-       }
-
-       for (i = 0; i < NUM_CACHED_CULTURES; ++i) {
-               if (this->ui_culture_info [i]) {
-                       if (this->ui_culture_info [i]->vtable->domain == domain)
-                               /* Replace */
-                               break;
-               }
-               else
-                       /* Free entry */
-                       break;
-       }
-       if (i < NUM_CACHED_CULTURES)
-               this->ui_culture_info [i] = culture;
-       mono_monitor_exit (this->synch_lock);
+       cache_culture (this, culture, UICULTURES_START_IDX);
 }
 
 void
 ves_icall_System_Threading_Thread_SetSerializedCurrentUICulture (MonoThread *this, MonoArray *arr)
 {
-       mono_monitor_enter (this->synch_lock);
+       ensure_synch_cs_set (this);
+       
+       EnterCriticalSection (this->synch_cs);
+       
        if (this->serialized_ui_culture_info)
                g_free (this->serialized_ui_culture_info);
        this->serialized_ui_culture_info = g_new0 (guint8, mono_array_length (arr));
        this->serialized_ui_culture_info_len = mono_array_length (arr);
        memcpy (this->serialized_ui_culture_info, mono_array_addr (arr, guint8, 0), mono_array_length (arr));
-       mono_monitor_exit (this->synch_lock);
+
+       LeaveCriticalSection (this->synch_cs);
 }
 
 /* the jit may read the compiled code of this function */
 MonoThread *
 mono_thread_current (void)
 {
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": returning %p", GET_CURRENT_OBJECT ()));
+       THREAD_DEBUG (g_message ("%s: returning %p", __func__, GET_CURRENT_OBJECT ()));
        return GET_CURRENT_OBJECT ();
 }
 
 gboolean ves_icall_System_Threading_Thread_Join_internal(MonoThread *this,
                                                         int ms, HANDLE thread)
 {
+       MonoThread *cur_thread = mono_thread_current ();
        gboolean ret;
        
        MONO_ARCH_SAVE_REGS;
+       
+       mono_thread_current_check_pending_interrupt ();
+
+       ensure_synch_cs_set (this);
+       
+       EnterCriticalSection (this->synch_cs);
+       
+       if ((this->state & ThreadState_Unstarted) != 0) {
+               LeaveCriticalSection (this->synch_cs);
+               
+               mono_raise_exception (mono_get_exception_thread_state ("Thread has not been started."));
+               return FALSE;
+       }
 
-       mono_monitor_enter (this->synch_lock);
-       this->state |= ThreadState_WaitSleepJoin;
-       mono_monitor_exit (this->synch_lock);
+       LeaveCriticalSection (this->synch_cs);
 
        if(ms== -1) {
                ms=INFINITE;
        }
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": joining thread handle %p, %d ms",
-                  thread, ms));
+       THREAD_DEBUG (g_message ("%s: joining thread handle %p, %d ms", __func__, thread, ms));
        
+       mono_thread_set_state (cur_thread, ThreadState_WaitSleepJoin);
+
        ret=WaitForSingleObjectEx (thread, ms, TRUE);
 
-       mono_monitor_enter (this->synch_lock);
-       this->state &= ~ThreadState_WaitSleepJoin;
-       mono_monitor_exit (this->synch_lock);
+       mono_thread_clr_state (cur_thread, ThreadState_WaitSleepJoin);
        
        if(ret==WAIT_OBJECT_0) {
-               THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": join successful"));
+               THREAD_DEBUG (g_message ("%s: join successful", __func__));
 
                return(TRUE);
        }
        
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": join failed"));
+       THREAD_DEBUG (g_message ("%s: join failed", __func__));
 
        return(FALSE);
 }
@@ -840,44 +1327,35 @@ gboolean ves_icall_System_Threading_WaitHandle_WaitAll_internal(MonoArray *mono_
        guint32 ret;
        guint32 i;
        MonoObject *waitHandle;
-       MonoClass *klass;
        MonoThread *thread = mono_thread_current ();
                
        MONO_ARCH_SAVE_REGS;
 
+       /* Do this WaitSleepJoin check before creating objects */
+       mono_thread_current_check_pending_interrupt ();
+
        numhandles = mono_array_length(mono_handles);
        handles = g_new0(HANDLE, numhandles);
 
-       if (wait_handle_os_handle_field == 0) {
-               /* Get the field os_handle which will contain the actual handle */
-               klass = mono_class_from_name(mono_defaults.corlib, "System.Threading", "WaitHandle");   
-               wait_handle_os_handle_field = mono_class_get_field_from_name(klass, "os_handle");
-       }
-               
        for(i = 0; i < numhandles; i++) {       
-               waitHandle = mono_array_get(mono_handles, MonoObject*, i);              
-               mono_field_get_value(waitHandle, wait_handle_os_handle_field, &handles[i]);
+               waitHandle = mono_array_get(mono_handles, MonoObject*, i);
+               handles [i] = mono_wait_handle_get_handle ((MonoWaitHandle *) waitHandle);
        }
        
        if(ms== -1) {
                ms=INFINITE;
        }
 
-       mono_monitor_enter (thread->synch_lock);
-       thread->state |= ThreadState_WaitSleepJoin;
-       mono_monitor_exit (thread->synch_lock);
+       mono_thread_set_state (thread, ThreadState_WaitSleepJoin);
        
        ret=WaitForMultipleObjectsEx(numhandles, handles, TRUE, ms, TRUE);
 
-       mono_monitor_enter (thread->synch_lock);
-       thread->state &= ~ThreadState_WaitSleepJoin;
-       mono_monitor_exit (thread->synch_lock);
+       mono_thread_clr_state (thread, ThreadState_WaitSleepJoin);
 
        g_free(handles);
 
        if(ret==WAIT_FAILED) {
-               THREAD_WAIT_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) Wait failed",
-                         GetCurrentThreadId ()));
+               THREAD_WAIT_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Wait failed", __func__, GetCurrentThreadId ()));
                return(FALSE);
        } else if(ret==WAIT_TIMEOUT || ret == WAIT_IO_COMPLETION) {
                /* Do we want to try again if we get
@@ -885,8 +1363,7 @@ gboolean ves_icall_System_Threading_WaitHandle_WaitAll_internal(MonoArray *mono_
                 * WaitHandle doesn't give any clues.  (We'd have to
                 * fiddle with the timeout if we retry.)
                 */
-               THREAD_WAIT_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) Wait timed out",
-                         GetCurrentThreadId ()));
+               THREAD_WAIT_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Wait timed out", __func__, GetCurrentThreadId ()));
                return(FALSE);
        }
        
@@ -901,43 +1378,34 @@ gint32 ves_icall_System_Threading_WaitHandle_WaitAny_internal(MonoArray *mono_ha
        guint32 ret;
        guint32 i;
        MonoObject *waitHandle;
-       MonoClass *klass;
        MonoThread *thread = mono_thread_current ();
                
        MONO_ARCH_SAVE_REGS;
 
+       /* Do this WaitSleepJoin check before creating objects */
+       mono_thread_current_check_pending_interrupt ();
+
        numhandles = mono_array_length(mono_handles);
        handles = g_new0(HANDLE, numhandles);
 
-       if (wait_handle_os_handle_field == 0) {
-               /* Get the field os_handle which will contain the actual handle */
-               klass = mono_class_from_name(mono_defaults.corlib, "System.Threading", "WaitHandle");   
-               wait_handle_os_handle_field = mono_class_get_field_from_name(klass, "os_handle");
-       }
-               
        for(i = 0; i < numhandles; i++) {       
-               waitHandle = mono_array_get(mono_handles, MonoObject*, i);              
-               mono_field_get_value(waitHandle, wait_handle_os_handle_field, &handles[i]);
+               waitHandle = mono_array_get(mono_handles, MonoObject*, i);
+               handles [i] = mono_wait_handle_get_handle ((MonoWaitHandle *) waitHandle);
        }
        
        if(ms== -1) {
                ms=INFINITE;
        }
 
-       mono_monitor_enter (thread->synch_lock);
-       thread->state |= ThreadState_WaitSleepJoin;
-       mono_monitor_exit (thread->synch_lock);
-
+       mono_thread_set_state (thread, ThreadState_WaitSleepJoin);
+       
        ret=WaitForMultipleObjectsEx(numhandles, handles, FALSE, ms, TRUE);
 
-       mono_monitor_enter (thread->synch_lock);
-       thread->state &= ~ThreadState_WaitSleepJoin;
-       mono_monitor_exit (thread->synch_lock);
-
+       mono_thread_clr_state (thread, ThreadState_WaitSleepJoin);
+       
        g_free(handles);
 
-       THREAD_WAIT_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) returning %d",
-                 GetCurrentThreadId (), ret));
+       THREAD_WAIT_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") returning %d", __func__, GetCurrentThreadId (), ret));
 
        /*
         * These need to be here.  See MSDN dos on WaitForMultipleObjects.
@@ -961,73 +1429,151 @@ gboolean ves_icall_System_Threading_WaitHandle_WaitOne_internal(MonoObject *this
        
        MONO_ARCH_SAVE_REGS;
 
-       THREAD_WAIT_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) waiting for %p, %d ms",
-                 GetCurrentThreadId (), handle, ms));
+       THREAD_WAIT_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") waiting for %p, %d ms", __func__, GetCurrentThreadId (), handle, ms));
        
        if(ms== -1) {
                ms=INFINITE;
        }
        
-       mono_monitor_enter (thread->synch_lock);
-       thread->state |= ThreadState_WaitSleepJoin;
-       mono_monitor_exit (thread->synch_lock);
-
-       ret=WaitForSingleObjectEx (handle, ms, TRUE);
+       mono_thread_current_check_pending_interrupt ();
+
+       mono_thread_set_state (thread, ThreadState_WaitSleepJoin);
+       
+       ret=WaitForSingleObjectEx (handle, ms, TRUE);
+       
+       mono_thread_clr_state (thread, ThreadState_WaitSleepJoin);
+       
+       if(ret==WAIT_FAILED) {
+               THREAD_WAIT_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Wait failed", __func__, GetCurrentThreadId ()));
+               return(FALSE);
+       } else if(ret==WAIT_TIMEOUT || ret == WAIT_IO_COMPLETION) {
+               /* Do we want to try again if we get
+                * WAIT_IO_COMPLETION? The documentation for
+                * WaitHandle doesn't give any clues.  (We'd have to
+                * fiddle with the timeout if we retry.)
+                */
+               THREAD_WAIT_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Wait timed out", __func__, GetCurrentThreadId ()));
+               return(FALSE);
+       }
+       
+       return(TRUE);
+}
+
+HANDLE ves_icall_System_Threading_Mutex_CreateMutex_internal (MonoBoolean owned, MonoString *name, MonoBoolean *created)
+{ 
+       HANDLE mutex;
+       
+       MONO_ARCH_SAVE_REGS;
+   
+       *created = TRUE;
+       
+       if (name == NULL) {
+               mutex = CreateMutex (NULL, owned, NULL);
+       } else {
+               mutex = CreateMutex (NULL, owned, mono_string_chars (name));
+               
+               if (GetLastError () == ERROR_ALREADY_EXISTS) {
+                       *created = FALSE;
+               }
+       }
+
+       return(mutex);
+}                                                                   
+
+MonoBoolean ves_icall_System_Threading_Mutex_ReleaseMutex_internal (HANDLE handle ) { 
+       MONO_ARCH_SAVE_REGS;
+
+       return(ReleaseMutex (handle));
+}
+
+HANDLE ves_icall_System_Threading_Mutex_OpenMutex_internal (MonoString *name,
+                                                           gint32 rights,
+                                                           gint32 *error)
+{
+       HANDLE ret;
+       
+       MONO_ARCH_SAVE_REGS;
+       
+       *error = ERROR_SUCCESS;
        
-       mono_monitor_enter (thread->synch_lock);
-       thread->state &= ~ThreadState_WaitSleepJoin;
-       mono_monitor_exit (thread->synch_lock);
-
-       if(ret==WAIT_FAILED) {
-               THREAD_WAIT_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) Wait failed",
-                         GetCurrentThreadId ()));
-               return(FALSE);
-       } else if(ret==WAIT_TIMEOUT || ret == WAIT_IO_COMPLETION) {
-               /* Do we want to try again if we get
-                * WAIT_IO_COMPLETION? The documentation for
-                * WaitHandle doesn't give any clues.  (We'd have to
-                * fiddle with the timeout if we retry.)
-                */
-               THREAD_WAIT_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": (%d) Wait timed out",
-                         GetCurrentThreadId ()));
-               return(FALSE);
+       ret = OpenMutex (rights, FALSE, mono_string_chars (name));
+       if (ret == NULL) {
+               *error = GetLastError ();
        }
        
-       return(TRUE);
+       return(ret);
 }
 
-HANDLE ves_icall_System_Threading_Mutex_CreateMutex_internal (MonoBoolean owned, MonoString *name, MonoBoolean *created)
+
+HANDLE ves_icall_System_Threading_Semaphore_CreateSemaphore_internal (gint32 initialCount, gint32 maximumCount, MonoString *name, MonoBoolean *created)
 { 
-       HANDLE mutex;
+       HANDLE sem;
        
        MONO_ARCH_SAVE_REGS;
    
        *created = TRUE;
        
        if (name == NULL) {
-               mutex = CreateMutex (NULL, owned, NULL);
+               sem = CreateSemaphore (NULL, initialCount, maximumCount, NULL);
        } else {
-               mutex = CreateMutex (NULL, owned, mono_string_chars (name));
+               sem = CreateSemaphore (NULL, initialCount, maximumCount,
+                                      mono_string_chars (name));
                
                if (GetLastError () == ERROR_ALREADY_EXISTS) {
                        *created = FALSE;
                }
        }
 
-       return(mutex);
+       return(sem);
 }                                                                   
 
-void ves_icall_System_Threading_Mutex_ReleaseMutex_internal (HANDLE handle ) { 
+gint32 ves_icall_System_Threading_Semaphore_ReleaseSemaphore_internal (HANDLE handle, gint32 releaseCount, MonoBoolean *fail)
+{ 
+       gint32 prevcount;
+       
        MONO_ARCH_SAVE_REGS;
 
-       ReleaseMutex(handle);
+       *fail = !ReleaseSemaphore (handle, releaseCount, &prevcount);
+
+       return (prevcount);
+}
+
+HANDLE ves_icall_System_Threading_Semaphore_OpenSemaphore_internal (MonoString *name, gint32 rights, gint32 *error)
+{
+       HANDLE ret;
+       
+       MONO_ARCH_SAVE_REGS;
+       
+       *error = ERROR_SUCCESS;
+       
+       ret = OpenSemaphore (rights, FALSE, mono_string_chars (name));
+       if (ret == NULL) {
+               *error = GetLastError ();
+       }
+       
+       return(ret);
 }
 
-HANDLE ves_icall_System_Threading_Events_CreateEvent_internal (MonoBoolean manual, MonoBoolean initial, MonoString *name) {
+HANDLE ves_icall_System_Threading_Events_CreateEvent_internal (MonoBoolean manual, MonoBoolean initial, MonoString *name, MonoBoolean *created)
+{
+       HANDLE event;
+       
        MONO_ARCH_SAVE_REGS;
 
-       return(CreateEvent (NULL, manual, initial,
-                           name==NULL?NULL:mono_string_chars (name)));
+       *created = TRUE;
+
+       if (name == NULL) {
+               event = CreateEvent (NULL, manual, initial, NULL);
+       } else {
+               event = CreateEvent (NULL, manual, initial,
+                                    mono_string_chars (name));
+               
+               if (GetLastError () == ERROR_ALREADY_EXISTS) {
+                       *created = FALSE;
+               }
+       }
+       
+       return(event);
 }
 
 gboolean ves_icall_System_Threading_Events_SetEvent_internal (HANDLE handle) {
@@ -1049,6 +1595,24 @@ ves_icall_System_Threading_Events_CloseEvent_internal (HANDLE handle) {
        CloseHandle (handle);
 }
 
+HANDLE ves_icall_System_Threading_Events_OpenEvent_internal (MonoString *name,
+                                                            gint32 rights,
+                                                            gint32 *error)
+{
+       HANDLE ret;
+       
+       MONO_ARCH_SAVE_REGS;
+       
+       *error = ERROR_SUCCESS;
+       
+       ret = OpenEvent (rights, FALSE, mono_string_chars (name));
+       if (ret == NULL) {
+               *error = GetLastError ();
+       }
+       
+       return(ret);
+}
+
 gint32 ves_icall_System_Threading_Interlocked_Increment_Int (gint32 *location)
 {
        MONO_ARCH_SAVE_REGS;
@@ -1062,11 +1626,11 @@ gint64 ves_icall_System_Threading_Interlocked_Increment_Long (gint64 *location)
 
        MONO_ARCH_SAVE_REGS;
 
-       EnterCriticalSection(&interlocked_mutex);
+       mono_interlocked_lock ();
 
        ret = ++ *location;
        
-       LeaveCriticalSection(&interlocked_mutex);
+       mono_interlocked_unlock ();
 
        
        return ret;
@@ -1085,11 +1649,11 @@ gint64 ves_icall_System_Threading_Interlocked_Decrement_Long (gint64 * location)
 
        MONO_ARCH_SAVE_REGS;
 
-       EnterCriticalSection(&interlocked_mutex);
+       mono_interlocked_lock ();
 
        ret = -- *location;
        
-       LeaveCriticalSection(&interlocked_mutex);
+       mono_interlocked_unlock ();
 
        return ret;
 }
@@ -1132,10 +1696,10 @@ ves_icall_System_Threading_Interlocked_Exchange_Long (gint64 *location, gint64 v
         * According to MSDN, this function is only atomic with regards to the 
         * other Interlocked functions on 32 bit platforms.
         */
-       EnterCriticalSection(&interlocked_mutex);
+       mono_interlocked_lock ();
        res = *location;
        *location = value;
-       LeaveCriticalSection(&interlocked_mutex);
+       mono_interlocked_unlock ();
 
        return res;
 #endif
@@ -1158,10 +1722,10 @@ ves_icall_System_Threading_Interlocked_Exchange_Double (gdouble *location, gdoub
         * According to MSDN, this function is only atomic with regards to the 
         * other Interlocked functions on 32 bit platforms.
         */
-       EnterCriticalSection(&interlocked_mutex);
+       mono_interlocked_lock ();
        res = *location;
        *location = value;
-       LeaveCriticalSection(&interlocked_mutex);
+       mono_interlocked_unlock ();
 
        return res;
 #endif
@@ -1208,11 +1772,11 @@ ves_icall_System_Threading_Interlocked_CompareExchange_Double (gdouble *location
 #else
        gdouble old;
 
-       EnterCriticalSection(&interlocked_mutex);
+       mono_interlocked_lock ();
        old = *location;
        if (old == comparand)
                *location = value;
-       LeaveCriticalSection(&interlocked_mutex);
+       mono_interlocked_unlock ();
 
        return old;
 #endif
@@ -1226,16 +1790,32 @@ ves_icall_System_Threading_Interlocked_CompareExchange_Long (gint64 *location, g
 #else
        gint64 old;
 
-       EnterCriticalSection(&interlocked_mutex);
+       mono_interlocked_lock ();
        old = *location;
        if (old == comparand)
                *location = value;
-       LeaveCriticalSection(&interlocked_mutex);
+       mono_interlocked_unlock ();
        
        return old;
 #endif
 }
 
+MonoObject*
+ves_icall_System_Threading_Interlocked_CompareExchange_T (MonoObject **location, MonoObject *value, MonoObject *comparand)
+{
+       MONO_ARCH_SAVE_REGS;
+
+       return InterlockedCompareExchangePointer ((gpointer *)location, value, comparand);
+}
+
+MonoObject*
+ves_icall_System_Threading_Interlocked_Exchange_T (MonoObject **location, MonoObject *value)
+{
+       MONO_ARCH_SAVE_REGS;
+
+       return InterlockedExchangePointer ((gpointer *)location, value);
+}
+
 gint32 
 ves_icall_System_Threading_Interlocked_Add_Int (gint32 *location, gint32 value)
 {
@@ -1246,12 +1826,12 @@ ves_icall_System_Threading_Interlocked_Add_Int (gint32 *location, gint32 value)
 #else
        gint32 orig;
 
-       EnterCriticalSection(&interlocked_mutex);
+       mono_interlocked_lock ();
        orig = *location;
        *location = orig + value;
-       LeaveCriticalSection(&interlocked_mutex);
+       mono_interlocked_unlock ();
 
-       return orig;
+       return orig + value;
 #endif
 }
 
@@ -1265,12 +1845,12 @@ ves_icall_System_Threading_Interlocked_Add_Long (gint64 *location, gint64 value)
 #else
        gint64 orig;
 
-       EnterCriticalSection(&interlocked_mutex);
+       mono_interlocked_lock ();
        orig = *location;
        *location = orig + value;
-       LeaveCriticalSection(&interlocked_mutex);
+       mono_interlocked_unlock ();
 
-       return orig;
+       return orig + value;
 #endif
 }
 
@@ -1283,35 +1863,147 @@ ves_icall_System_Threading_Interlocked_Read_Long (gint64 *location)
 #else
        gint64 res;
 
-       EnterCriticalSection(&interlocked_mutex);
+       mono_interlocked_lock ();
        res = *location;
-       LeaveCriticalSection(&interlocked_mutex);
+       mono_interlocked_unlock ();
 
        return res;
 #endif
 }
 
+void
+ves_icall_System_Threading_Thread_MemoryBarrier (void)
+{
+       mono_threads_lock ();
+       mono_threads_unlock ();
+}
+
+void
+ves_icall_System_Threading_Thread_ClrState (MonoThread* this, guint32 state)
+{
+       mono_thread_clr_state (this, state);
+
+       if (state & ThreadState_Background) {
+               /* If the thread changes the background mode, the main thread has to
+                * be notified, since it has to rebuild the list of threads to
+                * wait for.
+                */
+               SetEvent (background_change_event);
+       }
+}
+
+void
+ves_icall_System_Threading_Thread_SetState (MonoThread* this, guint32 state)
+{
+       mono_thread_set_state (this, state);
+       
+       if (state & ThreadState_Background) {
+               /* If the thread changes the background mode, the main thread has to
+                * be notified, since it has to rebuild the list of threads to
+                * wait for.
+                */
+               SetEvent (background_change_event);
+       }
+}
+
+guint32
+ves_icall_System_Threading_Thread_GetState (MonoThread* this)
+{
+       guint32 state;
+
+       ensure_synch_cs_set (this);
+       
+       EnterCriticalSection (this->synch_cs);
+       
+       state = this->state;
+
+       LeaveCriticalSection (this->synch_cs);
+       
+       return state;
+}
+
+void ves_icall_System_Threading_Thread_Interrupt_internal (MonoThread *this)
+{
+       gboolean throw = FALSE;
+       
+       ensure_synch_cs_set (this);
+
+       if (this == mono_thread_current ())
+               return;
+       
+       EnterCriticalSection (this->synch_cs);
+       
+       this->thread_interrupt_requested = TRUE;
+       
+       if (this->state & ThreadState_WaitSleepJoin) {
+               throw = TRUE;
+       }
+       
+       LeaveCriticalSection (this->synch_cs);
+       
+       if (throw) {
+               signal_thread_state_change (this);
+       }
+}
+
+void mono_thread_current_check_pending_interrupt ()
+{
+       MonoThread *thread = mono_thread_current ();
+       gboolean throw = FALSE;
+
+       mono_debugger_check_interruption ();
+
+       ensure_synch_cs_set (thread);
+       
+       EnterCriticalSection (thread->synch_cs);
+       
+       if (thread->thread_interrupt_requested) {
+               throw = TRUE;
+               thread->thread_interrupt_requested = FALSE;
+       }
+       
+       LeaveCriticalSection (thread->synch_cs);
+
+       if (throw) {
+               mono_raise_exception (mono_get_exception_thread_interrupted ());
+       }
+}
+
 int  
 mono_thread_get_abort_signal (void)
 {
-#if defined (__MINGW32__) || defined (_MSC_VER)
+#ifdef PLATFORM_WIN32
        return -1;
 #else
 #ifndef        SIGRTMIN
        return SIGUSR1;
 #else
+       static int abort_signum = -1;
+       int i;
+       if (abort_signum != -1)
+               return abort_signum;
+       /* we try to avoid SIGRTMIN and any one that might have been set already, see bug #75387 */
+       for (i = SIGRTMIN + 1; i < SIGRTMAX; ++i) {
+               struct sigaction sinfo;
+               sigaction (i, NULL, &sinfo);
+               if (sinfo.sa_handler == SIG_DFL && (void*)sinfo.sa_sigaction == (void*)SIG_DFL) {
+                       abort_signum = i;
+                       return i;
+               }
+       }
+       /* fallback to the old way */
        return SIGRTMIN;
 #endif
-#endif /*defined (__MINGW32__) || defined (_MSC_VER) */
+#endif /* PLATFORM_WIN32 */
 }
 
-#if defined (__MINGW32__) || defined (_MSC_VER)
+#ifdef PLATFORM_WIN32
 static void CALLBACK interruption_request_apc (ULONG_PTR param)
 {
        MonoException* exc = mono_thread_request_interruption (FALSE);
        if (exc) mono_raise_exception (exc);
 }
-#endif /* defined (__MINGW32__) || defined (_MSC_VER) */
+#endif /* PLATFORM_WIN32 */
 
 /*
  * signal_thread_state_change
@@ -1328,16 +2020,16 @@ static void signal_thread_state_change (MonoThread *thread)
                        mono_raise_exception (exc);
        }
 
-#if defined (__MINGW32__) || defined (_MSC_VER)
+#ifdef PLATFORM_WIN32
        QueueUserAPC ((PAPCFUNC)interruption_request_apc, thread->handle, NULL);
 #else
        /* fixme: store the state somewhere */
 #ifdef PTHREAD_POINTER_ID
-       pthread_kill (GUINT_TO_POINTER(thread->tid), mono_thread_get_abort_signal ());
+       pthread_kill ((gpointer)(gsize)(thread->tid), mono_thread_get_abort_signal ());
 #else
        pthread_kill (thread->tid, mono_thread_get_abort_signal ());
 #endif
-#endif /* defined (__MINGW32__) || defined (__MSC_VER) */
+#endif /* PLATFORM_WIN32 */
 }
 
 void
@@ -1345,27 +2037,36 @@ ves_icall_System_Threading_Thread_Abort (MonoThread *thread, MonoObject *state)
 {
        MONO_ARCH_SAVE_REGS;
 
-       mono_monitor_enter (thread->synch_lock);
-
+       ensure_synch_cs_set (thread);
+       
+       EnterCriticalSection (thread->synch_cs);
+       
        if ((thread->state & ThreadState_AbortRequested) != 0 || 
-               (thread->state & ThreadState_StopRequested) != 0) 
+               (thread->state & ThreadState_StopRequested) != 0 ||
+               (thread->state & ThreadState_Stopped) != 0)
        {
-               mono_monitor_exit (thread->synch_lock);
+               LeaveCriticalSection (thread->synch_cs);
+               return;
+       }
+
+       if ((thread->state & ThreadState_Unstarted) != 0) {
+               thread->state |= ThreadState_Aborted;
+               LeaveCriticalSection (thread->synch_cs);
                return;
        }
 
        thread->state |= ThreadState_AbortRequested;
-       thread->abort_state = state;
+       MONO_OBJECT_SETREF (thread, abort_state, state);
        thread->abort_exc = NULL;
 
-       mono_monitor_exit (thread->synch_lock);
+       LeaveCriticalSection (thread->synch_cs);
 
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION
-                  ": (%d) Abort requested for %p (%d)", GetCurrentThreadId (),
-                  thread, thread->tid));
-       
-       /* Make sure the thread is awake */
-       ves_icall_System_Threading_Thread_Resume (thread);
+       THREAD_DEBUG (g_message ("%s: (%"G_GSIZE_FORMAT") Abort requested for %p (%"G_GSIZE_FORMAT")", __func__, GetCurrentThreadId (), thread, (gsize)thread->tid));
+
+       /* During shutdown, we can't wait for other threads */
+       if (!shutting_down)
+               /* Make sure the thread is awake */
+               mono_thread_resume (thread);
        
        signal_thread_state_change (thread);
 }
@@ -1376,74 +2077,113 @@ ves_icall_System_Threading_Thread_ResetAbort (void)
        MonoThread *thread = mono_thread_current ();
 
        MONO_ARCH_SAVE_REGS;
+
+       ensure_synch_cs_set (thread);
        
-       mono_monitor_enter (thread->synch_lock);
-       
+       EnterCriticalSection (thread->synch_cs);
+
        thread->state &= ~ThreadState_AbortRequested;
        
        if (!thread->abort_exc) {
                const char *msg = "Unable to reset abort because no abort was requested";
-               mono_monitor_exit (thread->synch_lock);
+               LeaveCriticalSection (thread->synch_cs);
                mono_raise_exception (mono_get_exception_thread_state (msg));
        } else {
                thread->abort_exc = NULL;
                thread->abort_state = NULL;
        }
        
-       mono_monitor_exit (thread->synch_lock);
+       LeaveCriticalSection (thread->synch_cs);
 }
 
-void
-ves_icall_System_Threading_Thread_Suspend (MonoThread *thread)
+static gboolean
+mono_thread_suspend (MonoThread *thread)
 {
        MONO_ARCH_SAVE_REGS;
 
-       mono_monitor_enter (thread->synch_lock);
+       ensure_synch_cs_set (thread);
+       
+       EnterCriticalSection (thread->synch_cs);
+
+       if ((thread->state & ThreadState_Unstarted) != 0 || 
+               (thread->state & ThreadState_Aborted) != 0 || 
+               (thread->state & ThreadState_Stopped) != 0)
+       {
+               LeaveCriticalSection (thread->synch_cs);
+               return FALSE;
+       }
 
        if ((thread->state & ThreadState_Suspended) != 0 || 
                (thread->state & ThreadState_SuspendRequested) != 0 ||
                (thread->state & ThreadState_StopRequested) != 0) 
        {
-               mono_monitor_exit (thread->synch_lock);
-               return;
+               LeaveCriticalSection (thread->synch_cs);
+               return TRUE;
        }
        
        thread->state |= ThreadState_SuspendRequested;
-       mono_monitor_exit (thread->synch_lock);
+
+       LeaveCriticalSection (thread->synch_cs);
 
        signal_thread_state_change (thread);
+       return TRUE;
 }
 
 void
-ves_icall_System_Threading_Thread_Resume (MonoThread *thread)
+ves_icall_System_Threading_Thread_Suspend (MonoThread *thread)
+{
+       if (!mono_thread_suspend (thread))
+               mono_raise_exception (mono_get_exception_thread_state ("Thread has not been started, or is dead."));
+}
+
+static gboolean
+mono_thread_resume (MonoThread *thread)
 {
        MONO_ARCH_SAVE_REGS;
 
-       mono_monitor_enter (thread->synch_lock);
+       ensure_synch_cs_set (thread);
+       
+       EnterCriticalSection (thread->synch_cs);
 
        if ((thread->state & ThreadState_SuspendRequested) != 0) {
                thread->state &= ~ThreadState_SuspendRequested;
-               mono_monitor_exit (thread->synch_lock);
-               return;
+               LeaveCriticalSection (thread->synch_cs);
+               return TRUE;
        }
-               
-       if ((thread->state & ThreadState_Suspended) == 0) 
+
+       if ((thread->state & ThreadState_Suspended) == 0 ||
+               (thread->state & ThreadState_Unstarted) != 0 || 
+               (thread->state & ThreadState_Aborted) != 0 || 
+               (thread->state & ThreadState_Stopped) != 0)
        {
-               mono_monitor_exit (thread->synch_lock);
-               return;
+               LeaveCriticalSection (thread->synch_cs);
+               return FALSE;
        }
        
        thread->resume_event = CreateEvent (NULL, TRUE, FALSE, NULL);
+       if (thread->resume_event == NULL) {
+               LeaveCriticalSection (thread->synch_cs);
+               return(FALSE);
+       }
        
        /* Awake the thread */
        SetEvent (thread->suspend_event);
 
-       mono_monitor_exit (thread->synch_lock);
+       LeaveCriticalSection (thread->synch_cs);
 
        /* Wait for the thread to awake */
        WaitForSingleObject (thread->resume_event, INFINITE);
        CloseHandle (thread->resume_event);
        thread->resume_event = NULL;
+
+       return TRUE;
+}
+
+void
+ves_icall_System_Threading_Thread_Resume (MonoThread *thread)
+{
+       if (!mono_thread_resume (thread))
+               mono_raise_exception (mono_get_exception_thread_state ("Thread has not been started, or is dead."));
 }
 
 static gboolean
@@ -1472,22 +2212,24 @@ is_running_protected_wrapper (void)
 
 void mono_thread_stop (MonoThread *thread)
 {
-       mono_monitor_enter (thread->synch_lock);
+       ensure_synch_cs_set (thread);
+       
+       EnterCriticalSection (thread->synch_cs);
 
        if ((thread->state & ThreadState_StopRequested) != 0 ||
                (thread->state & ThreadState_Stopped) != 0)
        {
-               mono_monitor_exit (thread->synch_lock);
+               LeaveCriticalSection (thread->synch_cs);
                return;
        }
        
        /* Make sure the thread is awake */
-       ves_icall_System_Threading_Thread_Resume (thread);
-       
+       mono_thread_resume (thread);
+
        thread->state |= ThreadState_StopRequested;
        thread->state &= ~ThreadState_AbortRequested;
        
-       mono_monitor_exit (thread->synch_lock);
+       LeaveCriticalSection (thread->synch_cs);
        
        signal_thread_state_change (thread);
 }
@@ -1555,20 +2297,27 @@ ves_icall_System_Threading_Thread_VolatileWriteIntPtr (void *ptr, void *value)
 void mono_thread_init (MonoThreadStartCB start_cb,
                       MonoThreadAttachCB attach_cb)
 {
+       MONO_GC_REGISTER_ROOT (small_id_table);
        InitializeCriticalSection(&threads_mutex);
        InitializeCriticalSection(&interlocked_mutex);
        InitializeCriticalSection(&contexts_mutex);
+       InitializeCriticalSection(&delayed_free_table_mutex);
+       InitializeCriticalSection(&small_id_mutex);
+       
+       background_change_event = CreateEvent (NULL, TRUE, FALSE, NULL);
+       g_assert(background_change_event != NULL);
        
        mono_init_static_data_info (&thread_static_info);
        mono_init_static_data_info (&context_static_info);
 
        current_object_key=TlsAlloc();
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": Allocated current_object_key %d",
-                  current_object_key));
+       THREAD_DEBUG (g_message ("%s: Allocated current_object_key %d", __func__, current_object_key));
 
        mono_thread_start_cb = start_cb;
        mono_thread_attach_cb = attach_cb;
 
+       delayed_free_table = g_array_new (FALSE, FALSE, sizeof (DelayedFreeItem));
+
        /* Get a pseudo handle to the current process.  This is just a
         * kludge so that wapi can build a process handle if needed.
         * As a pseudo handle is returned, we don't need to clean
@@ -1577,21 +2326,70 @@ void mono_thread_init (MonoThreadStartCB start_cb,
        GetCurrentProcess ();
 }
 
+void mono_thread_cleanup (void)
+{
+       mono_thread_hazardous_try_free_all ();
+
+#if !defined(PLATFORM_WIN32) && !defined(RUN_IN_SUBTHREAD)
+       /* The main thread must abandon any held mutexes (particularly
+        * important for named mutexes as they are shared across
+        * processes, see bug 74680.)  This will happen when the
+        * thread exits, but if it's not running in a subthread it
+        * won't exit in time.
+        */
+       /* Using non-w32 API is a nasty kludge, but I couldn't find
+        * anything in the documentation that would let me do this
+        * here yet still be safe to call on windows.
+        */
+       _wapi_thread_signal_self (mono_environment_exitcode_get ());
+#endif
+
+#if 0
+       /* This stuff needs more testing, it seems one of these
+        * critical sections can be locked when mono_thread_cleanup is
+        * called.
+        */
+       DeleteCriticalSection (&threads_mutex);
+       DeleteCriticalSection (&interlocked_mutex);
+       DeleteCriticalSection (&contexts_mutex);
+       DeleteCriticalSection (&delayed_free_table_mutex);
+       DeleteCriticalSection (&small_id_mutex);
+       CloseHandle (background_change_event);
+#endif
+
+       g_array_free (delayed_free_table, TRUE);
+       delayed_free_table = NULL;
+
+       TlsFree (current_object_key);
+}
+
 void
 mono_threads_install_cleanup (MonoThreadCleanupFunc func)
 {
-       mono_thread_cleanup = func;
+       mono_thread_cleanup_fn = func;
+}
+
+void
+mono_thread_set_manage_callback (MonoThread *thread, MonoThreadManageCallback func)
+{
+       thread->manage_callback = func;
 }
 
-void mono_install_thread_callbacks (MonoThreadCallbacks *callbacks)
+void mono_threads_install_notify_pending_exc (MonoThreadNotifyPendingExcFunc func)
 {
-       mono_thread_callbacks = callbacks;
+       mono_thread_notify_pending_exc_fn = func;
 }
 
 G_GNUC_UNUSED
 static void print_tids (gpointer key, gpointer value, gpointer user)
 {
-       g_message ("Waiting for: %d", GPOINTER_TO_UINT(key));
+       /* GPOINTER_TO_UINT breaks horribly if sizeof(void *) >
+        * sizeof(uint) and a cast to uint would overflow
+        */
+       /* Older versions of glib don't have G_GSIZE_FORMAT, so just
+        * print this as a pointer.
+        */
+       g_message ("Waiting for: %p", key);
 }
 
 struct wait_data 
@@ -1605,14 +2403,13 @@ static void wait_for_tids (struct wait_data *wait, guint32 timeout)
 {
        guint32 i, ret;
        
-       THREAD_DEBUG (g_message(G_GNUC_PRETTY_FUNCTION
-                 ": %d threads to wait for in this batch", wait->num));
+       THREAD_DEBUG (g_message("%s: %d threads to wait for in this batch", __func__, wait->num));
 
        ret=WaitForMultipleObjectsEx(wait->num, wait->handles, TRUE, timeout, FALSE);
 
        if(ret==WAIT_FAILED) {
                /* See the comment in build_wait_tids() */
-               THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": Wait failed"));
+               THREAD_DEBUG (g_message ("%s: Wait failed", __func__));
                return;
        }
        
@@ -1623,9 +2420,10 @@ static void wait_for_tids (struct wait_data *wait, guint32 timeout)
                return;
 
        for(i=0; i<wait->num; i++) {
-               guint32 tid=wait->threads[i]->tid;
+               gsize tid = wait->threads[i]->tid;
                
-               if(mono_g_hash_table_lookup (threads, GUINT_TO_POINTER(tid))!=NULL) {
+               mono_threads_lock ();
+               if(mono_g_hash_table_lookup (threads, (gpointer)tid)!=NULL) {
                        /* This thread must have been killed, because
                         * it hasn't cleaned itself up. (It's just
                         * possible that the thread exited before the
@@ -1637,13 +2435,57 @@ static void wait_for_tids (struct wait_data *wait, guint32 timeout)
                         * same thread.)
                         */
        
-                       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION
-                                  ": cleaning up after thread %d", tid));
+                       mono_threads_unlock ();
+                       THREAD_DEBUG (g_message ("%s: cleaning up after thread %p (%"G_GSIZE_FORMAT")", __func__, wait->threads[i], tid));
                        thread_cleanup (wait->threads[i]);
+               } else {
+                       mono_threads_unlock ();
                }
        }
 }
 
+static void wait_for_tids_or_state_change (struct wait_data *wait, guint32 timeout)
+{
+       guint32 i, ret, count;
+       
+       THREAD_DEBUG (g_message("%s: %d threads to wait for in this batch", __func__, wait->num));
+
+       /* Add the thread state change event, so it wakes up if a thread changes
+        * to background mode.
+        */
+       count = wait->num;
+       if (count < MAXIMUM_WAIT_OBJECTS) {
+               wait->handles [count] = background_change_event;
+               count++;
+       }
+
+       ret=WaitForMultipleObjectsEx (count, wait->handles, FALSE, timeout, FALSE);
+
+       if(ret==WAIT_FAILED) {
+               /* See the comment in build_wait_tids() */
+               THREAD_DEBUG (g_message ("%s: Wait failed", __func__));
+               return;
+       }
+       
+       for(i=0; i<wait->num; i++)
+               CloseHandle (wait->handles[i]);
+
+       if (ret == WAIT_TIMEOUT)
+               return;
+       
+       if (ret < wait->num) {
+               gsize tid = wait->threads[ret]->tid;
+               mono_threads_lock ();
+               if (mono_g_hash_table_lookup (threads, (gpointer)tid)!=NULL) {
+                       /* See comment in wait_for_tids about thread cleanup */
+                       mono_threads_unlock ();
+                       THREAD_DEBUG (g_message ("%s: cleaning up after thread %"G_GSIZE_FORMAT, __func__, tid));
+                       thread_cleanup (wait->threads [ret]);
+               } else
+                       mono_threads_unlock ();
+       }
+}
+
 static void build_wait_tids (gpointer key, gpointer value, gpointer user)
 {
        struct wait_data *wait=(struct wait_data *)user;
@@ -1653,25 +2495,45 @@ static void build_wait_tids (gpointer key, gpointer value, gpointer user)
                MonoThread *thread=(MonoThread *)value;
 
                /* Ignore background threads, we abort them later */
-               if (thread->state & ThreadState_Background)
+               /* Do not lock here since it is not needed and the caller holds threads_lock */
+               if (thread->state & ThreadState_Background) {
+                       THREAD_DEBUG (g_message ("%s: ignoring background thread %"G_GSIZE_FORMAT, __func__, (gsize)thread->tid));
                        return; /* just leave, ignore */
+               }
                
-               if (mono_gc_is_finalizer_thread (thread))
+               if (mono_gc_is_finalizer_thread (thread)) {
+                       THREAD_DEBUG (g_message ("%s: ignoring finalizer thread %"G_GSIZE_FORMAT, __func__, (gsize)thread->tid));
                        return;
+               }
 
-               if (thread == mono_thread_current ())
+               if (thread == mono_thread_current ()) {
+                       THREAD_DEBUG (g_message ("%s: ignoring current thread %"G_GSIZE_FORMAT, __func__, (gsize)thread->tid));
                        return;
+               }
 
-               if (thread == mono_thread_get_main ())
+               if (thread == mono_thread_get_main ()) {
+                       THREAD_DEBUG (g_message ("%s: ignoring main thread %"G_GSIZE_FORMAT, __func__, (gsize)thread->tid));
                        return;
+               }
 
                handle = OpenThread (THREAD_ALL_ACCESS, TRUE, thread->tid);
-               if (handle == NULL)
+               if (handle == NULL) {
+                       THREAD_DEBUG (g_message ("%s: ignoring unopenable thread %"G_GSIZE_FORMAT, __func__, (gsize)thread->tid));
                        return;
+               }
+               
+               THREAD_DEBUG (g_message ("%s: Invoking mono_thread_manage callback on thread %p", __func__, thread));
+               if ((thread->manage_callback == NULL) || (thread->manage_callback (thread) == TRUE)) {
+                       wait->handles[wait->num]=handle;
+                       wait->threads[wait->num]=thread;
+                       wait->num++;
+
+                       THREAD_DEBUG (g_message ("%s: adding thread %"G_GSIZE_FORMAT, __func__, (gsize)thread->tid));
+               } else {
+                       THREAD_DEBUG (g_message ("%s: ignoring (because of callback) thread %"G_GSIZE_FORMAT, __func__, (gsize)thread->tid));
+               }
+               
                
-               wait->handles[wait->num]=handle;
-               wait->threads[wait->num]=thread;
-               wait->num++;
        } else {
                /* Just ignore the rest, we can't do anything with
                 * them yet
@@ -1683,82 +2545,160 @@ static gboolean
 remove_and_abort_threads (gpointer key, gpointer value, gpointer user)
 {
        struct wait_data *wait=(struct wait_data *)user;
-       guint32 self = GetCurrentThreadId ();
+       gsize self = GetCurrentThreadId ();
        MonoThread *thread = (MonoThread *) value;
        HANDLE handle;
 
-       /* The finalizer thread is not a background thread */
-       if (thread->tid != self && thread->state & ThreadState_Background) {
-       
-               handle = OpenThread (THREAD_ALL_ACCESS, TRUE, thread->tid);
-               if (handle == NULL)
-                       return FALSE;
-               
-               wait->handles[wait->num]=thread->handle;
-               wait->threads[wait->num]=thread;
-               wait->num++;
-       
-               if(thread->state & ThreadState_AbortRequested ||
-                  thread->state & ThreadState_Aborted) {
-                       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": Thread id %d already aborting", thread->tid));
-                       return(TRUE);
+       if (wait->num >= MAXIMUM_WAIT_OBJECTS)
+               return FALSE;
+
+       /* The finalizer thread is not a background thread */
+       if (thread->tid != self && (thread->state & ThreadState_Background) != 0) {
+       
+               handle = OpenThread (THREAD_ALL_ACCESS, TRUE, thread->tid);
+               if (handle == NULL)
+                       return FALSE;
+
+               /* printf ("A: %d\n", wait->num); */
+               wait->handles[wait->num]=thread->handle;
+               wait->threads[wait->num]=thread;
+               wait->num++;
+
+               THREAD_DEBUG (g_print ("%s: Aborting id: %"G_GSIZE_FORMAT"\n", __func__, (gsize)thread->tid));
+               mono_thread_stop (thread);
+               return TRUE;
+       }
+
+       return (thread->tid != self && !mono_gc_is_finalizer_thread (thread)); 
+}
+
+static MonoException* mono_thread_execute_interruption (MonoThread *thread);
+
+/** 
+ * mono_threads_set_shutting_down:
+ *
+ * Is called by a thread that wants to shut down Mono. If the runtime is already
+ * shutting down, the calling thread is suspended/stopped, and this function never
+ * returns.
+ */
+void
+mono_threads_set_shutting_down (void)
+{
+       MonoThread *current_thread = mono_thread_current ();
+
+       mono_threads_lock ();
+
+       if (shutting_down) {
+               mono_threads_unlock ();
+
+               /* Make sure we're properly suspended/stopped */
+
+               EnterCriticalSection (current_thread->synch_cs);
+
+               if ((current_thread->state & ThreadState_SuspendRequested) ||
+                   (current_thread->state & ThreadState_AbortRequested) ||
+                   (current_thread->state & ThreadState_StopRequested)) {
+                       LeaveCriticalSection (current_thread->synch_cs);
+                       mono_thread_execute_interruption (current_thread);
+               } else {
+                       current_thread->state |= ThreadState_Stopped;
+                       LeaveCriticalSection (current_thread->synch_cs);
                }
+
+               /* Wake up other threads potentially waiting for us */
+               ExitThread (0);
+       } else {
+               shutting_down = TRUE;
+
+               /* Not really a background state change, but this will
+                * interrupt the main thread if it is waiting for all
+                * the other threads.
+                */
+               SetEvent (background_change_event);
                
-               THREAD_DEBUG (g_print (G_GNUC_PRETTY_FUNCTION ": Aborting id: %d\n", thread->tid));
-               mono_thread_stop (thread);
-               return TRUE;
+               mono_threads_unlock ();
        }
+}
 
-       return (thread->tid != self && !mono_gc_is_finalizer_thread (thread)); 
+/** 
+ * mono_threads_is_shutting_down:
+ *
+ * Returns whether a thread has commenced shutdown of Mono.  Note that
+ * if the function returns FALSE the caller must not assume that
+ * shutdown is not in progress, because the situation might have
+ * changed since the function returned.  For that reason this function
+ * is of very limited utility.
+ */
+gboolean
+mono_threads_is_shutting_down (void)
+{
+       return shutting_down;
 }
 
 void mono_thread_manage (void)
 {
        struct wait_data *wait=g_new0 (struct wait_data, 1);
-       
+
        /* join each thread that's still running */
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": Joining each running thread..."));
+       THREAD_DEBUG (g_message ("%s: Joining each running thread...", __func__));
        
-       EnterCriticalSection (&threads_mutex);
+       mono_threads_lock ();
        if(threads==NULL) {
-               THREAD_DEBUG (g_message(G_GNUC_PRETTY_FUNCTION ": No threads"));
-               LeaveCriticalSection (&threads_mutex);
+               THREAD_DEBUG (g_message("%s: No threads", __func__));
+               mono_threads_unlock ();
                return;
        }
-       LeaveCriticalSection (&threads_mutex);
+       mono_threads_unlock ();
        
        do {
-               EnterCriticalSection (&threads_mutex);
-               THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION
-                         ":There are %d threads to join", mono_g_hash_table_size (threads));
+               mono_threads_lock ();
+               if (shutting_down) {
+                       /* somebody else is shutting down */
+                       mono_threads_unlock ();
+                       break;
+               }
+               THREAD_DEBUG (g_message ("%s: There are %d threads to join", __func__, mono_g_hash_table_size (threads));
                        mono_g_hash_table_foreach (threads, print_tids, NULL));
        
+               ResetEvent (background_change_event);
                wait->num=0;
                mono_g_hash_table_foreach (threads, build_wait_tids, wait);
-               LeaveCriticalSection (&threads_mutex);
+               mono_threads_unlock ();
                if(wait->num>0) {
                        /* Something to wait for */
-                       wait_for_tids (wait, INFINITE);
+                       wait_for_tids_or_state_change (wait, INFINITE);
                }
+               THREAD_DEBUG (g_message ("%s: I have %d threads after waiting.", __func__, wait->num));
        } while(wait->num>0);
-       
-       mono_thread_pool_cleanup ();
 
-       EnterCriticalSection(&threads_mutex);
+       mono_threads_set_shutting_down ();
+
+       /* No new threads will be created after this point */
+
+       mono_runtime_set_shutting_down ();
+
+       THREAD_DEBUG (g_message ("%s: threadpool cleanup", __func__));
+       mono_thread_pool_cleanup ();
 
        /* 
         * Remove everything but the finalizer thread and self.
         * Also abort all the background threads
         * */
-       wait->num = 0;
-       mono_g_hash_table_foreach_remove (threads, remove_and_abort_threads, wait);
+       do {
+               mono_threads_lock ();
 
-       LeaveCriticalSection(&threads_mutex);
+               wait->num = 0;
+               mono_g_hash_table_foreach_remove (threads, remove_and_abort_threads, wait);
 
-       if(wait->num>0) {
-               /* Something to wait for */
-               wait_for_tids (wait, INFINITE);
-       }
+               mono_threads_unlock ();
+
+               THREAD_DEBUG (g_message ("%s: wait->num is now %d", __func__, wait->num));
+               if(wait->num>0) {
+                       /* Something to wait for */
+                       wait_for_tids (wait, INFINITE);
+               }
+       } while (wait->num > 0);
+       
        /* 
         * give the subthreads a chance to really quit (this is mainly needed
         * to get correct user and system times from getrusage/wait/time(1)).
@@ -1774,35 +2714,42 @@ void mono_thread_manage (void)
 static void terminate_thread (gpointer key, gpointer value, gpointer user)
 {
        MonoThread *thread=(MonoThread *)value;
-       guint32 self=GPOINTER_TO_UINT (user);
        
-       if(thread->tid!=self) {
+       if(thread->tid != (gsize)user) {
                /*TerminateThread (thread->handle, -1);*/
        }
 }
 
 void mono_thread_abort_all_other_threads (void)
 {
-       guint32 self=GetCurrentThreadId ();
+       gsize self = GetCurrentThreadId ();
 
-       EnterCriticalSection (&threads_mutex);
-       THREAD_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ":There are %d threads to abort",
-                 mono_g_hash_table_size (threads));
-               mono_g_hash_table_foreach (threads, print_tids, NULL));
+       mono_threads_lock ();
+       THREAD_DEBUG (g_message ("%s: There are %d threads to abort", __func__,
+                                mono_g_hash_table_size (threads));
+                     mono_g_hash_table_foreach (threads, print_tids, NULL));
 
-       mono_g_hash_table_foreach (threads, terminate_thread,
-                                  GUINT_TO_POINTER (self));
+       mono_g_hash_table_foreach (threads, terminate_thread, (gpointer)self);
        
-       LeaveCriticalSection (&threads_mutex);
+       mono_threads_unlock ();
 }
 
 static void
-collect_threads (gpointer key, gpointer value, gpointer user_data)
+collect_threads_for_suspend (gpointer key, gpointer value, gpointer user_data)
 {
        MonoThread *thread = (MonoThread*)value;
        struct wait_data *wait = (struct wait_data*)user_data;
        HANDLE handle;
 
+       /* 
+        * We try to exclude threads early, to avoid running into the MAXIMUM_WAIT_OBJECTS
+        * limitation.
+        * This needs no locking.
+        */
+       if ((thread->state & ThreadState_Suspended) != 0 || 
+               (thread->state & ThreadState_Stopped) != 0)
+               return;
+
        if (wait->num<MAXIMUM_WAIT_OBJECTS) {
                handle = OpenThread (THREAD_ALL_ACCESS, TRUE, thread->tid);
                if (handle == NULL)
@@ -1817,75 +2764,182 @@ collect_threads (gpointer key, gpointer value, gpointer user_data)
 /*
  * mono_thread_suspend_all_other_threads:
  *
- *  Suspend all managed threads except the finalizer thread and this thread.
+ *  Suspend all managed threads except the finalizer thread and this thread. It is
+ * not possible to resume them later.
  */
 void mono_thread_suspend_all_other_threads (void)
 {
        struct wait_data *wait = g_new0 (struct wait_data, 1);
-       int i, waitnum;
-       guint32 self = GetCurrentThreadId ();
+       int i;
+       gsize self = GetCurrentThreadId ();
        gpointer *events;
        guint32 eventidx = 0;
+       gboolean starting, finished;
 
-       /* 
-        * Make a copy of the hashtable since we can't do anything with
-        * threads while threads_mutex is held.
+       /*
+        * The other threads could be in an arbitrary state at this point, i.e.
+        * they could be starting up, shutting down etc. This means that there could be
+        * threads which are not even in the threads hash table yet.
         */
-       EnterCriticalSection (&threads_mutex);
-       mono_g_hash_table_foreach (threads, collect_threads, wait);
-       LeaveCriticalSection (&threads_mutex);
 
-       events = g_new0 (gpointer, wait->num);
-       waitnum = 0;
-       /* Get the suspended events that we'll be waiting for */
-       for (i = 0; i < wait->num; ++i) {
-               MonoThread *thread = wait->threads [i];
+       /* 
+        * First we set a barrier which will be checked by all threads before they
+        * are added to the threads hash table, and they will exit if the flag is set.
+        * This ensures that no threads could be added to the hash later.
+        * We will use shutting_down as the barrier for now.
+        */
+       g_assert (shutting_down);
 
-               if ((thread->tid == self) || mono_gc_is_finalizer_thread (thread)) {
-                       //CloseHandle (wait->handles [i]);
-                       wait->threads [i] = NULL; /* ignore this thread in next loop */
-                       continue;
+       /*
+        * We make multiple calls to WaitForMultipleObjects since:
+        * - we can only wait for MAXIMUM_WAIT_OBJECTS threads
+        * - some threads could exit without becoming suspended
+        */
+       finished = FALSE;
+       while (!finished) {
+               /*
+                * Make a copy of the hashtable since we can't do anything with
+                * threads while threads_mutex is held.
+                */
+               wait->num = 0;
+               mono_threads_lock ();
+               mono_g_hash_table_foreach (threads, collect_threads_for_suspend, wait);
+               mono_threads_unlock ();
+
+               events = g_new0 (gpointer, wait->num);
+               eventidx = 0;
+               /* Get the suspended events that we'll be waiting for */
+               for (i = 0; i < wait->num; ++i) {
+                       MonoThread *thread = wait->threads [i];
+
+                       if ((thread->tid == self) || mono_gc_is_finalizer_thread (thread)) {
+                               //CloseHandle (wait->handles [i]);
+                               wait->threads [i] = NULL; /* ignore this thread in next loop */
+                               continue;
+                       }
+
+                       ensure_synch_cs_set (thread);
+               
+                       EnterCriticalSection (thread->synch_cs);
+
+                       if ((thread->state & ThreadState_Suspended) != 0 || 
+                               (thread->state & ThreadState_SuspendRequested) != 0 ||
+                               (thread->state & ThreadState_StopRequested) != 0 ||
+                               (thread->state & ThreadState_Stopped) != 0) {
+                               LeaveCriticalSection (thread->synch_cs);
+                               CloseHandle (wait->handles [i]);
+                               wait->threads [i] = NULL; /* ignore this thread in next loop */
+                               continue;
+                       }
+
+                       /* Convert abort requests into suspend requests */
+                       if ((thread->state & ThreadState_AbortRequested) != 0)
+                               thread->state &= ~ThreadState_AbortRequested;
+                       
+                       thread->state |= ThreadState_SuspendRequested;
+
+                       if (thread->suspended_event == NULL) {
+                               thread->suspended_event = CreateEvent (NULL, TRUE, FALSE, NULL);
+                               if (thread->suspended_event == NULL) {
+                                       /* Forget this one and go on to the next */
+                                       LeaveCriticalSection (thread->synch_cs);
+                                       continue;
+                               }
+                       }
+
+                       events [eventidx++] = thread->suspended_event;
+                       LeaveCriticalSection (thread->synch_cs);
+
+                       /* Signal the thread to suspend */
+                       signal_thread_state_change (thread);
                }
 
-               mono_monitor_enter (thread->synch_lock);
-
-               if ((thread->state & ThreadState_Suspended) != 0 || 
-                       (thread->state & ThreadState_SuspendRequested) != 0 ||
-                       (thread->state & ThreadState_StopRequested) != 0 ||
-                       (thread->state & ThreadState_Stopped) != 0) {
-                       mono_monitor_exit (thread->synch_lock);
-                       CloseHandle (wait->handles [i]);
-                       wait->threads [i] = NULL; /* ignore this thread in next loop */
-                       continue;
+               if (eventidx > 0) {
+                       WaitForMultipleObjectsEx (eventidx, events, TRUE, 100, FALSE);
+                       for (i = 0; i < wait->num; ++i) {
+                               MonoThread *thread = wait->threads [i];
+
+                               if (thread == NULL)
+                                       continue;
+                       
+                               EnterCriticalSection (thread->synch_cs);
+                               if ((thread->state & ThreadState_Suspended) != 0) {
+                                       CloseHandle (thread->suspended_event);
+                                       thread->suspended_event = NULL;
+                               }
+                               LeaveCriticalSection (thread->synch_cs);
+                       }
+               } else {
+                       /* 
+                        * If there are threads which are starting up, we wait until they
+                        * are suspended when they try to register in the threads hash.
+                        * This is guaranteed to finish, since the threads which can create new
+                        * threads get suspended after a while.
+                        * FIXME: The finalizer thread can still create new threads.
+                        */
+                       mono_threads_lock ();
+                       starting = mono_g_hash_table_size (threads_starting_up) > 0;
+                       mono_threads_unlock ();
+                       if (starting)
+                               Sleep (100);
+                       else
+                               finished = TRUE;
                }
 
-               thread->state |= ThreadState_SuspendRequested;
+               g_free (events);
+       }
+
+       g_free (wait);
+}
 
-               if (thread->suspended_event == NULL)
-                       thread->suspended_event = CreateEvent (NULL, TRUE, FALSE, NULL);
+static void
+collect_threads (gpointer key, gpointer value, gpointer user_data)
+{
+       MonoThread *thread = (MonoThread*)value;
+       struct wait_data *wait = (struct wait_data*)user_data;
+       HANDLE handle;
 
-               events [eventidx++] = thread->suspended_event;
-               mono_monitor_exit (thread->synch_lock);
+       if (wait->num<MAXIMUM_WAIT_OBJECTS) {
+               handle = OpenThread (THREAD_ALL_ACCESS, TRUE, thread->tid);
+               if (handle == NULL)
+                       return;
 
-               /* Signal the thread to suspend */
-               signal_thread_state_change (thread);
+               wait->handles [wait->num] = handle;
+               wait->threads [wait->num] = thread;
+               wait->num++;
        }
+}
+
+/**
+ * mono_threads_request_thread_dump:
+ *
+ *   Ask all threads except the current to print their stacktrace to stdout.
+ */
+void
+mono_threads_request_thread_dump (void)
+{
+       struct wait_data *wait = g_new0 (struct wait_data, 1);
+       int i;
+
+       /* 
+        * Make a copy of the hashtable since we can't do anything with
+        * threads while threads_mutex is held.
+        */
+       mono_threads_lock ();
+       mono_g_hash_table_foreach (threads, collect_threads, wait);
+       mono_threads_unlock ();
 
-       WaitForMultipleObjectsEx (eventidx, events, TRUE, INFINITE, FALSE);
        for (i = 0; i < wait->num; ++i) {
                MonoThread *thread = wait->threads [i];
 
-               if (thread == NULL)
-                       continue;
+               if (!mono_gc_is_finalizer_thread (thread) && (thread != mono_thread_current ()) && !thread->thread_dump_requested) {
+                       thread->thread_dump_requested = TRUE;
 
-               mono_monitor_enter (thread->synch_lock);
-               CloseHandle (thread->suspended_event);
-               thread->suspended_event = NULL;
-               mono_monitor_exit (thread->synch_lock);
-       }
+                       signal_thread_state_change (thread);
+               }
 
-       g_free (events);
-       g_free (wait);
+               CloseHandle (wait->handles [i]);
+       }
 }
 
 /*
@@ -1901,10 +2955,10 @@ mono_thread_push_appdomain_ref (MonoDomain *domain)
        MonoThread *thread = mono_thread_current ();
 
        if (thread) {
-               /* printf ("PUSH REF: %x -> %s.\n", thread->tid, domain->friendly_name); */
-               EnterCriticalSection (&threads_mutex);
+               /* printf ("PUSH REF: %"G_GSIZE_FORMAT" -> %s.\n", (gsize)thread->tid, domain->friendly_name); */
+               mono_threads_lock ();
                thread->appdomain_refs = g_slist_prepend (thread->appdomain_refs, domain);
-               LeaveCriticalSection (&threads_mutex);
+               mono_threads_unlock ();
        }
 }
 
@@ -1914,12 +2968,12 @@ mono_thread_pop_appdomain_ref (void)
        MonoThread *thread = mono_thread_current ();
 
        if (thread) {
-               /* printf ("POP REF: %x -> %s.\n", thread->tid, ((MonoDomain*)(thread->appdomain_refs->data))->friendly_name); */
-               EnterCriticalSection (&threads_mutex);
+               /* printf ("POP REF: %"G_GSIZE_FORMAT" -> %s.\n", (gsize)thread->tid, ((MonoDomain*)(thread->appdomain_refs->data))->friendly_name); */
+               mono_threads_lock ();
                /* FIXME: How can the list be empty ? */
                if (thread->appdomain_refs)
                        thread->appdomain_refs = g_slist_remove (thread->appdomain_refs, thread->appdomain_refs->data);
-               LeaveCriticalSection (&threads_mutex);
+               mono_threads_unlock ();
        }
 }
 
@@ -1927,9 +2981,9 @@ gboolean
 mono_thread_has_appdomain_ref (MonoThread *thread, MonoDomain *domain)
 {
        gboolean res;
-       EnterCriticalSection (&threads_mutex);
+       mono_threads_lock ();
        res = g_slist_find (thread->appdomain_refs, domain) != NULL;
-       LeaveCriticalSection (&threads_mutex);
+       mono_threads_unlock ();
        return res;
 }
 
@@ -1947,13 +3001,13 @@ abort_appdomain_thread (gpointer key, gpointer value, gpointer user_data)
 
        if (mono_thread_has_appdomain_ref (thread, domain)) {
                /* printf ("ABORTING THREAD %p BECAUSE IT REFERENCES DOMAIN %s.\n", thread->tid, domain->friendly_name); */
-               HANDLE handle = OpenThread (THREAD_ALL_ACCESS, TRUE, thread->tid);
-               if (handle == NULL)
-                       return;
 
                ves_icall_System_Threading_Thread_Abort (thread, NULL);
 
                if(data->wait.num<MAXIMUM_WAIT_OBJECTS) {
+                       HANDLE handle = OpenThread (THREAD_ALL_ACCESS, TRUE, thread->tid);
+                       if (handle == NULL)
+                               return;
                        data->wait.handles [data->wait.num] = handle;
                        data->wait.threads [data->wait.num] = thread;
                        data->wait.num++;
@@ -1975,31 +3029,36 @@ mono_threads_abort_appdomain_threads (MonoDomain *domain, int timeout)
 {
        abort_appdomain_data user_data;
        guint32 start_time;
+       int orig_timeout = timeout;
 
-       /* printf ("ABORT BEGIN.\n"); */
+       THREAD_DEBUG (g_message ("%s: starting abort", __func__));
 
-       start_time = GetTickCount ();
+       start_time = mono_msec_ticks ();
        do {
-               EnterCriticalSection (&threads_mutex);
+               mono_threads_lock ();
 
                user_data.domain = domain;
                user_data.wait.num = 0;
                mono_g_hash_table_foreach (threads, abort_appdomain_thread, &user_data);
-               LeaveCriticalSection (&threads_mutex);
+               mono_threads_unlock ();
 
                if (user_data.wait.num > 0)
-                       wait_for_tids (&user_data.wait, timeout);
+                       /*
+                        * We should wait for the threads either to abort, or to leave the
+                        * domain. We can't do the latter, so we wait with a timeout.
+                        */
+                       wait_for_tids (&user_data.wait, 100);
 
                /* Update remaining time */
-               timeout -= GetTickCount () - start_time;
-               start_time = GetTickCount ();
+               timeout -= mono_msec_ticks () - start_time;
+               start_time = mono_msec_ticks ();
 
-               if (timeout < 0)
+               if (orig_timeout != -1 && timeout < 0)
                        return FALSE;
        }
        while (user_data.wait.num > 0);
 
-       /* printf ("ABORT DONE.\n"); */
+       THREAD_DEBUG (g_message ("%s: abort done", __func__));
 
        return TRUE;
 }
@@ -2012,17 +3071,13 @@ clear_cached_culture (gpointer key, gpointer value, gpointer user_data)
        int i;
 
        /* No locking needed here */
+       /* FIXME: why no locking? writes to the cache are protected with synch_cs above */
 
-       if (thread->culture_info) {
-               for (i = 0; i < NUM_CACHED_CULTURES; ++i) {
-                       if (thread->culture_info [i] && thread->culture_info [i]->vtable->domain == domain)
-                               thread->culture_info [i] = NULL;
-               }
-       }
-       if (thread->ui_culture_info) {
-               for (i = 0; i < NUM_CACHED_CULTURES; ++i) {
-                       if (thread->ui_culture_info [i] && thread->ui_culture_info [i]->vtable->domain == domain)
-                               thread->ui_culture_info [i] = NULL;
+       if (thread->cached_culture_info) {
+               for (i = 0; i < NUM_CACHED_CULTURES * 2; ++i) {
+                       MonoObject *obj = mono_array_get (thread->cached_culture_info, MonoObject*, i);
+                       if (obj && obj->vtable->domain == domain)
+                               mono_array_set (thread->cached_culture_info, MonoObject*, i, NULL);
                }
        }
 }
@@ -2036,19 +3091,19 @@ clear_cached_culture (gpointer key, gpointer value, gpointer user_data)
 void
 mono_threads_clear_cached_culture (MonoDomain *domain)
 {
-       EnterCriticalSection (&threads_mutex);
+       mono_threads_lock ();
        mono_g_hash_table_foreach (threads, clear_cached_culture, domain);
-       LeaveCriticalSection (&threads_mutex);
+       mono_threads_unlock ();
 }
 
 /*
- * mono_thread_get_pending_exception:
+ * mono_thread_get_undeniable_exception:
  *
  *   Return an exception which needs to be raised when leaving a catch clause.
  * This is used for undeniable exception propagation.
  */
 MonoException*
-mono_thread_get_pending_exception (void)
+mono_thread_get_undeniable_exception (void)
 {
        MonoThread *thread = mono_thread_current ();
 
@@ -2107,6 +3162,7 @@ static void mono_init_static_data_info (StaticDataInfo *static_data)
 {
        static_data->idx = 0;
        static_data->offset = 0;
+       static_data->freelist = NULL;
 }
 
 /*
@@ -2132,15 +3188,6 @@ mono_alloc_static_data_slot (StaticDataInfo *static_data, guint32 size, guint32
        if (static_data->offset + size >= static_data_size [static_data->idx]) {
                static_data->idx ++;
                g_assert (size <= static_data_size [static_data->idx]);
-               /* 
-                * massive unloading and reloading of domains with thread-static
-                * data may eventually exceed the allocated storage...
-                * Need to check what the MS runtime does in that case.
-                * Note that for each appdomain, we need to allocate a separate
-                * thread data slot for security reasons. We could keep track
-                * of the slots per-domain and when the domain is unloaded
-                * out the slots on a sort of free list.
-                */
                g_assert (static_data->idx < NUM_STATIC_DATA_IDX);
                static_data->offset = 0;
        }
@@ -2158,13 +3205,13 @@ thread_adjust_static_data (MonoThread *thread)
 {
        guint32 offset;
 
-       EnterCriticalSection (&threads_mutex);
+       mono_threads_lock ();
        if (thread_static_info.offset || thread_static_info.idx > 0) {
                /* get the current allocated size */
                offset = thread_static_info.offset | ((thread_static_info.idx + 1) << 24);
                mono_alloc_static_data (&(thread->static_data), offset);
        }
-       LeaveCriticalSection (&threads_mutex);
+       mono_threads_unlock ();
 }
 
 static void 
@@ -2176,6 +3223,24 @@ alloc_thread_static_data_helper (gpointer key, gpointer value, gpointer user)
        mono_alloc_static_data (&(thread->static_data), offset);
 }
 
+static MonoThreadDomainTls*
+search_tls_slot_in_freelist (StaticDataInfo *static_data, guint32 size, guint32 align)
+{
+       MonoThreadDomainTls* prev = NULL;
+       MonoThreadDomainTls* tmp = static_data->freelist;
+       while (tmp) {
+               if (tmp->size == size) {
+                       if (prev)
+                               prev->next = tmp->next;
+                       else
+                               static_data->freelist = tmp->next;
+                       return tmp;
+               }
+               tmp = tmp->next;
+       }
+       return NULL;
+}
+
 /*
  * The offset for a special static variable is composed of three parts:
  * a bit that indicates the type of static data (0:thread, 1:context),
@@ -2190,19 +3255,27 @@ mono_alloc_special_static_data (guint32 static_type, guint32 size, guint32 align
        guint32 offset;
        if (static_type == SPECIAL_STATIC_THREAD)
        {
-               EnterCriticalSection (&threads_mutex);
-               offset = mono_alloc_static_data_slot (&thread_static_info, size, align);
+               MonoThreadDomainTls *item;
+               mono_threads_lock ();
+               item = search_tls_slot_in_freelist (&thread_static_info, size, align);
+               /*g_print ("TLS alloc: %d in domain %p (total: %d), cached: %p\n", size, mono_domain_get (), thread_static_info.offset, item);*/
+               if (item) {
+                       offset = item->offset;
+                       g_free (item);
+               } else {
+                       offset = mono_alloc_static_data_slot (&thread_static_info, size, align);
+               }
                /* This can be called during startup */
                if (threads != NULL)
                        mono_g_hash_table_foreach (threads, alloc_thread_static_data_helper, GUINT_TO_POINTER (offset));
-               LeaveCriticalSection (&threads_mutex);
+               mono_threads_unlock ();
        }
        else
        {
                g_assert (static_type == SPECIAL_STATIC_CONTEXT);
-               EnterCriticalSection (&contexts_mutex);
+               mono_contexts_lock ();
                offset = mono_alloc_static_data_slot (&context_static_info, size, align);
-               LeaveCriticalSection (&contexts_mutex);
+               mono_contexts_unlock ();
                offset |= 0x80000000;   /* Set the high bit to indicate context static data */
        }
        return offset;
@@ -2231,70 +3304,133 @@ mono_get_special_static_data (guint32 offset)
                */
                MonoAppContext *context = mono_context_get ();
                if (!context->static_data || !context->static_data [idx]) {
-                       EnterCriticalSection (&contexts_mutex);
+                       mono_contexts_lock ();
                        mono_alloc_static_data (&(context->static_data), offset);
-                       LeaveCriticalSection (&contexts_mutex);
+                       mono_contexts_unlock ();
                }
                return ((char*) context->static_data [idx]) + (offset & 0xffffff);      
        }
 }
 
-static void gc_stop_world (gpointer key, gpointer value, gpointer user)
-{
-       MonoThread *thread=(MonoThread *)value;
-       guint32 self=GPOINTER_TO_UINT (user);
+typedef struct {
+       guint32 offset;
+       guint32 size;
+} TlsOffsetSize;
 
-       LIBGC_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": %d - %d", self, thread->tid));
+static void 
+free_thread_static_data_helper (gpointer key, gpointer value, gpointer user)
+{
+       MonoThread *thread = value;
+       TlsOffsetSize *data = user;
+       int idx = (data->offset >> 24) - 1;
+       char *ptr;
 
-       if(thread->tid==self)
+       if (!thread->static_data || !thread->static_data [idx])
                return;
-
-       SuspendThread (thread->handle);
+       ptr = ((char*) thread->static_data [idx]) + (data->offset & 0xffffff);
+       memset (ptr, 0, data->size);
 }
 
-void mono_gc_stop_world (void)
+static void
+do_free_special (gpointer key, gpointer value, gpointer data)
 {
-       guint32 self=GetCurrentThreadId ();
+       MonoClassField *field = key;
+       guint32 offset = GPOINTER_TO_UINT (value);
+       guint32 static_type = (offset & 0x80000000);
+       gint32 align;
+       guint32 size;
+       size = mono_type_size (field->type, &align);
+       /*g_print ("free %s , size: %d, offset: %x\n", field->name, size, offset);*/
+       if (static_type == 0) {
+               TlsOffsetSize data;
+               MonoThreadDomainTls *item = g_new0 (MonoThreadDomainTls, 1);
+               data.offset = offset & 0x7fffffff;
+               data.size = size;
+               if (threads != NULL)
+                       mono_g_hash_table_foreach (threads, free_thread_static_data_helper, &data);
+               item->offset = offset;
+               item->size = size;
+               item->next = thread_static_info.freelist;
+               thread_static_info.freelist = item;
+       } else {
+               /* FIXME: free context static data as well */
+       }
+}
 
-       LIBGC_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": %d - %p", self, threads));
+void
+mono_alloc_special_static_data_free (GHashTable *special_static_fields)
+{
+       mono_threads_lock ();
+       g_hash_table_foreach (special_static_fields, do_free_special, NULL);
+       mono_threads_unlock ();
+}
 
-       EnterCriticalSection (&threads_mutex);
+static MonoClassField *local_slots = NULL;
 
-       if (threads != NULL)
-               mono_g_hash_table_foreach (threads, gc_stop_world, GUINT_TO_POINTER (self));
-       
-       LeaveCriticalSection (&threads_mutex);
-}
+typedef struct {
+       /* local tls data to get locals_slot from a thread */
+       guint32 offset;
+       int idx;
+       /* index in the locals_slot array */
+       int slot;
+} LocalSlotID;
 
-static void gc_start_world (gpointer key, gpointer value, gpointer user)
+static void
+clear_local_slot (gpointer key, gpointer value, gpointer user_data)
 {
-       MonoThread *thread=(MonoThread *)value;
-       guint32 self=GPOINTER_TO_UINT (user);
-       
-       LIBGC_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": %d - %d", self, thread->tid));
-
-       if(thread->tid==self)
+       LocalSlotID *sid = user_data;
+       MonoThread *thread = (MonoThread*)value;
+       MonoArray *slots_array;
+       /*
+        * the static field is stored at: ((char*) thread->static_data [idx]) + (offset & 0xffffff);
+        * it is for the right domain, so we need to check if it is allocated an initialized
+        * for the current thread.
+        */
+       /*g_print ("handling thread %p\n", thread);*/
+       if (!thread->static_data || !thread->static_data [sid->idx])
                return;
-
-       ResumeThread (thread->handle);
+       slots_array = *(MonoArray **)(((char*) thread->static_data [sid->idx]) + (sid->offset & 0xffffff));
+       if (!slots_array || sid->slot >= mono_array_length (slots_array))
+               return;
+       mono_array_set (slots_array, MonoObject*, sid->slot, NULL);
 }
 
-void mono_gc_start_world (void)
+void
+mono_thread_free_local_slot_values (int slot, MonoBoolean thread_local)
 {
-       guint32 self=GetCurrentThreadId ();
-
-       LIBGC_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": %d - %p", self, threads));
-
-       EnterCriticalSection (&threads_mutex);
-
-       if (threads != NULL)
-               mono_g_hash_table_foreach (threads, gc_start_world, GUINT_TO_POINTER (self));
-       
-       LeaveCriticalSection (&threads_mutex);
+       MonoDomain *domain;
+       LocalSlotID sid;
+       sid.slot = slot;
+       if (thread_local) {
+               void *addr = NULL;
+               if (!local_slots) {
+                       local_slots = mono_class_get_field_from_name (mono_defaults.thread_class, "local_slots");
+                       if (!local_slots) {
+                               g_warning ("local_slots field not found in Thread class");
+                               return;
+                       }
+               }
+               domain = mono_domain_get ();
+               mono_domain_lock (domain);
+               if (domain->special_static_fields)
+                       addr = g_hash_table_lookup (domain->special_static_fields, local_slots);
+               mono_domain_unlock (domain);
+               if (!addr)
+                       return;
+               /*g_print ("freeing slot %d at %p\n", slot, addr);*/
+               sid.offset = GPOINTER_TO_UINT (addr);
+               sid.offset &= 0x7fffffff;
+               sid.idx = (sid.offset >> 24) - 1;
+               mono_threads_lock ();
+               mono_g_hash_table_foreach (threads, clear_local_slot, &sid);
+               mono_threads_unlock ();
+       } else {
+               /* FIXME: clear the slot for MonoAppContexts, too */
+       }
 }
 
-#ifdef __MINGW32__
-static CALLBACK void dummy_apc (ULONG_PTR param)
+#ifdef PLATFORM_WIN32
+static void CALLBACK dummy_apc (ULONG_PTR param)
 {
 }
 #else
@@ -2312,7 +3448,9 @@ static guint32 dummy_apc (gpointer param)
  */
 static MonoException* mono_thread_execute_interruption (MonoThread *thread)
 {
-       mono_monitor_enter (thread->synch_lock);
+       ensure_synch_cs_set (thread);
+       
+       EnterCriticalSection (thread->synch_cs);
 
        if (thread->interruption_requested) {
                /* this will consume pending APC calls */
@@ -2323,21 +3461,33 @@ static MonoException* mono_thread_execute_interruption (MonoThread *thread)
 
        if ((thread->state & ThreadState_AbortRequested) != 0) {
                if (thread->abort_exc == NULL)
-                       thread->abort_exc = mono_get_exception_thread_abort ();
-               mono_monitor_exit (thread->synch_lock);
+                       MONO_OBJECT_SETREF (thread, abort_exc, mono_get_exception_thread_abort ());
+               LeaveCriticalSection (thread->synch_cs);
                return thread->abort_exc;
        }
        else if ((thread->state & ThreadState_SuspendRequested) != 0) {
                thread->state &= ~ThreadState_SuspendRequested;
                thread->state |= ThreadState_Suspended;
                thread->suspend_event = CreateEvent (NULL, TRUE, FALSE, NULL);
+               if (thread->suspend_event == NULL) {
+                       LeaveCriticalSection (thread->synch_cs);
+                       return(NULL);
+               }
                if (thread->suspended_event)
                        SetEvent (thread->suspended_event);
-               mono_monitor_exit (thread->synch_lock);
+
+               LeaveCriticalSection (thread->synch_cs);
+
+               if (shutting_down) {
+                       /* After we left the lock, the runtime might shut down so everything becomes invalid */
+                       for (;;)
+                               Sleep (1000);
+               }
                
                WaitForSingleObject (thread->suspend_event, INFINITE);
                
-               mono_monitor_enter (thread->synch_lock);
+               EnterCriticalSection (thread->synch_cs);
+
                CloseHandle (thread->suspend_event);
                thread->suspend_event = NULL;
                thread->state &= ~ThreadState_Suspended;
@@ -2346,17 +3496,28 @@ static MonoException* mono_thread_execute_interruption (MonoThread *thread)
                 * and will be waiting for it
                 */
                SetEvent (thread->resume_event);
-               mono_monitor_exit (thread->synch_lock);
+
+               LeaveCriticalSection (thread->synch_cs);
+               
                return NULL;
        }
        else if ((thread->state & ThreadState_StopRequested) != 0) {
                /* FIXME: do this through the JIT? */
-               mono_monitor_exit (thread->synch_lock);
+
+               LeaveCriticalSection (thread->synch_cs);
+               
                mono_thread_exit ();
                return NULL;
+       } else if (thread->thread_interrupt_requested) {
+
+               thread->thread_interrupt_requested = FALSE;
+               LeaveCriticalSection (thread->synch_cs);
+               
+               return(mono_get_exception_thread_interrupted ());
        }
        
-       mono_monitor_exit (thread->synch_lock);
+       LeaveCriticalSection (thread->synch_cs);
+       
        return NULL;
 }
 
@@ -2368,7 +3529,8 @@ static MonoException* mono_thread_execute_interruption (MonoThread *thread)
  * the thread. If the result is an exception that needs to be throw, it is 
  * provided as return value.
  */
-MonoException* mono_thread_request_interruption (gboolean running_managed)
+MonoException*
+mono_thread_request_interruption (gboolean running_managed)
 {
        MonoThread *thread = mono_thread_current ();
 
@@ -2376,34 +3538,28 @@ MonoException* mono_thread_request_interruption (gboolean running_managed)
        if (thread == NULL) 
                return NULL;
        
-       mono_monitor_enter (thread->synch_lock);
-       
-       if (thread->interruption_requested) {
-               mono_monitor_exit (thread->synch_lock);
+       if (InterlockedCompareExchange (&thread->interruption_requested, 1, 0) == 1)
                return NULL;
-       }
 
        if (!running_managed || is_running_protected_wrapper ()) {
                /* Can't stop while in unmanaged code. Increase the global interruption
                   request count. When exiting the unmanaged method the count will be
                   checked and the thread will be interrupted. */
-
+               
                InterlockedIncrement (&thread_interruption_requested);
-               thread->interruption_requested = TRUE;
-               mono_monitor_exit (thread->synch_lock);
+
+               if (mono_thread_notify_pending_exc_fn && !running_managed)
+                       /* The JIT will notify the thread about the interruption */
+                       /* This shouldn't take any locks */
+                       mono_thread_notify_pending_exc_fn ();
 
                /* this will awake the thread if it is in WaitForSingleObject 
                   or similar */
+               /* Our implementation of this function ignores the func argument */
                QueueUserAPC ((PAPCFUNC)dummy_apc, thread->handle, NULL);
-               /* Someone is waiting for this thread to be suspended */
-               if (mono_runtime_is_shutting_down () && thread->suspended_event) {
-                       return mono_thread_execute_interruption (thread);
-               }
-               
                return NULL;
        }
        else {
-               mono_monitor_exit (thread->synch_lock);
                return mono_thread_execute_interruption (thread);
        }
 }
@@ -2426,7 +3582,9 @@ static void mono_thread_interruption_checkpoint_request (gboolean bypass_abort_p
        /* The thread may already be stopping */
        if (thread == NULL)
                return;
-       
+
+       mono_debugger_check_interruption ();
+
        if (thread->interruption_requested && (bypass_abort_protection || !is_running_protected_wrapper ())) {
                MonoException* exc = mono_thread_execute_interruption (thread);
                if (exc) mono_raise_exception (exc);
@@ -2450,6 +3608,63 @@ void mono_thread_force_interruption_checkpoint ()
        mono_thread_interruption_checkpoint_request (TRUE);
 }
 
+/*
+ * mono_thread_get_and_clear_pending_exception:
+ *
+ *   Return any pending exceptions for the current thread and clear it as a side effect.
+ */
+MonoException*
+mono_thread_get_and_clear_pending_exception (void)
+{
+       MonoThread *thread = mono_thread_current ();
+
+       /* The thread may already be stopping */
+       if (thread == NULL)
+               return NULL;
+
+       if (thread->interruption_requested && !is_running_protected_wrapper ()) {
+               return mono_thread_execute_interruption (thread);
+       }
+       
+       if (thread->pending_exception) {
+               MonoException *exc = thread->pending_exception;
+
+               thread->pending_exception = NULL;
+               return exc;
+       }
+
+       return NULL;
+}
+
+/*
+ * mono_set_pending_exception:
+ *
+ *   Set the pending exception of the current thread to EXC. On platforms which 
+ * support it, the exception will be thrown when execution returns to managed code. 
+ * On other platforms, this function is equivalent to mono_raise_exception (). 
+ * Internal calls which report exceptions using this function instead of 
+ * raise_exception () might be called by JITted code using a more efficient calling 
+ * convention.
+ */
+void
+mono_set_pending_exception (MonoException *exc)
+{
+       MonoThread *thread = mono_thread_current ();
+
+       /* The thread may already be stopping */
+       if (thread == NULL)
+               return;
+
+       if (mono_thread_notify_pending_exc_fn) {
+               MONO_OBJECT_SETREF (thread, pending_exception, exc);
+
+               mono_thread_notify_pending_exc_fn ();
+       } else {
+               /* No way to notify the JIT about the exception, have to throw it now */
+               mono_raise_exception (exc);
+       }
+}
+
 /**
  * mono_thread_interruption_request_flag:
  *
@@ -2464,40 +3679,74 @@ gint32* mono_thread_interruption_request_flag ()
        return &thread_interruption_requested;
 }
 
-#ifdef WITH_INCLUDED_LIBGC
-
-static void gc_push_all_stacks (gpointer key, gpointer value, gpointer user)
+void 
+mono_thread_init_apartment_state (void)
 {
-       MonoThread *thread=(MonoThread *)value;
-       guint32 *selfp=(guint32 *)user, self = *selfp;
+       MonoThread* thread;
+       thread = mono_thread_current ();
 
-       LIBGC_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": %d - %d - %p", self, thread->tid, thread->stack_ptr));
-
-       if(thread->tid==self) {
-               LIBGC_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": %p - %p", selfp, thread->stack_ptr));
-               GC_push_all_stack (selfp, thread->stack_ptr);
-               return;
+#ifdef PLATFORM_WIN32
+       /* Positive return value indicates success, either
+        * S_OK if this is first CoInitialize call, or
+        * S_FALSE if CoInitialize already called, but with same
+        * threading model. A negative value indicates failure,
+        * probably due to trying to change the threading model.
+        */
+       if (CoInitializeEx(NULL, (thread->apartment_state == ThreadApartmentState_STA) 
+                       ? COINIT_APARTMENTTHREADED 
+                       : COINIT_MULTITHREADED) < 0) {
+               thread->apartment_state = ThreadApartmentState_Unknown;
        }
+#endif
+}
 
+void 
+mono_thread_cleanup_apartment_state (void)
+{
 #ifdef PLATFORM_WIN32
-       GC_win32_push_thread_stack (thread->handle, thread->stack_ptr);
-#else
-       mono_wapi_push_thread_stack (thread->handle, thread->stack_ptr);
+       MonoThread* thread;
+       thread = mono_thread_current ();
+
+       if (thread && thread->apartment_state != ThreadApartmentState_Unknown) {
+               CoUninitialize ();
+       }
 #endif
 }
 
-void mono_gc_push_all_stacks (void)
+void
+mono_thread_set_state (MonoThread *thread, MonoThreadState state)
+{
+       ensure_synch_cs_set (thread);
+       
+       EnterCriticalSection (thread->synch_cs);
+       thread->state |= state;
+       LeaveCriticalSection (thread->synch_cs);
+}
+
+void
+mono_thread_clr_state (MonoThread *thread, MonoThreadState state)
 {
-       guint32 self=GetCurrentThreadId ();
+       ensure_synch_cs_set (thread);
+       
+       EnterCriticalSection (thread->synch_cs);
+       thread->state &= ~state;
+       LeaveCriticalSection (thread->synch_cs);
+}
 
-       LIBGC_DEBUG (g_message (G_GNUC_PRETTY_FUNCTION ": %d - %p", self, threads));
+gboolean
+mono_thread_test_state (MonoThread *thread, MonoThreadState test)
+{
+       gboolean ret = FALSE;
 
-       EnterCriticalSection (&threads_mutex);
+       ensure_synch_cs_set (thread);
+       
+       EnterCriticalSection (thread->synch_cs);
 
-       if (threads != NULL)
-               mono_g_hash_table_foreach (threads, gc_push_all_stacks, &self);
+       if ((thread->state & test) != 0) {
+               ret = TRUE;
+       }
+       
+       LeaveCriticalSection (thread->synch_cs);
        
-       LeaveCriticalSection (&threads_mutex);
+       return ret;
 }
-
-#endif /* WITH_INCLUDED_LIBGC */