Disable managed string alloc on amd64 for now.
[mono.git] / mono / metadata / threadpool.c
index 561ed1afdea31477f8a62836ef3647dbeefae1d2..f22c80c4ed049fd03b708d0801edfb8f20e30e36 100644 (file)
 #ifdef PLATFORM_WIN32
 #define WINVER 0x0500
 #define _WIN32_WINNT 0x0500
-#define THREADS_PER_CPU        25
-#else
-#define THREADS_PER_CPU        50
 #endif
 
+#define THREADS_PER_CPU        5 /* 20 + THREADS_PER_CPU * number of CPUs */
+
 #include <mono/metadata/domain-internals.h>
 #include <mono/metadata/tabledefs.h>
 #include <mono/metadata/threads.h>
 #include <mono/metadata/threads-types.h>
+#include <mono/metadata/threadpool-internals.h>
 #include <mono/metadata/exception.h>
 #include <mono/metadata/file-io.h>
 #include <mono/metadata/monitor.h>
+#include <mono/metadata/mono-mlist.h>
 #include <mono/metadata/marshal.h>
 #include <mono/metadata/socket-io.h>
 #include <mono/io-layer/io-layer.h>
 #include <mono/os/gc_wrapper.h>
 #include <errno.h>
+#ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>
+#endif
 #include <sys/types.h>
 #include <fcntl.h>
+#ifdef HAVE_UNISTD_H
 #include <unistd.h>
+#endif
 #include <string.h>
 
 #include <mono/utils/mono-poll.h>
 
 #include "threadpool.h"
 
+#define THREAD_WANTS_A_BREAK(t) ((t->state & (ThreadState_StopRequested | \
+                                               ThreadState_SuspendRequested)) != 0)
+
 #undef EPOLL_DEBUG
 
 /* maximum number of worker threads */
-static int mono_max_worker_threads = THREADS_PER_CPU;
-static int mono_min_worker_threads = 0;
-static int mono_io_max_worker_threads = THREADS_PER_CPU * 2;
+static int mono_max_worker_threads;
+static int mono_min_worker_threads;
+static int mono_io_max_worker_threads;
+static int mono_io_min_worker_threads;
 
 /* current number of worker threads */
 static int mono_worker_threads = 0;
@@ -76,7 +85,7 @@ typedef struct {
        CRITICAL_SECTION io_lock; /* access to sock_to_state */
        int inited;
        int pipe [2];
-       GHashTable *sock_to_state;
+       MonoGHashTable *sock_to_state;
 
        HANDLE new_sem; /* access to newpfd and write side of the pipe */
        mono_pollfd *newpfd;
@@ -92,26 +101,38 @@ static SocketIOData socket_io_data;
 static HANDLE job_added;
 static HANDLE io_job_added;
 
+/* Keep in sync with the System.MonoAsyncCall class which provides GC tracking */
 typedef struct {
+       MonoObject         object;
        MonoMethodMessage *msg;
-       HANDLE             wait_event;
        MonoMethod        *cb_method;
        MonoDelegate      *cb_target;
        MonoObject        *state;
        MonoObject        *res;
        MonoArray         *out_args;
+       /* This is a HANDLE, we use guint64 so the managed object layout remains constant */
+       guint64           wait_event;
 } ASyncCall;
 
+typedef struct {
+       MonoArray *array;
+       int first_elem;
+       int next_elem;
+} TPQueue;
+
 static void async_invoke_thread (gpointer data);
-static void append_job (CRITICAL_SECTION *cs, GList **plist, gpointer ar);
+static void append_job (CRITICAL_SECTION *cs, TPQueue *list, MonoObject *ar);
 static void start_thread_or_queue (MonoAsyncResult *ares);
 static void mono_async_invoke (MonoAsyncResult *ares);
-static gpointer dequeue_job (CRITICAL_SECTION *cs, GList **plist);
+static MonoObject* dequeue_job (CRITICAL_SECTION *cs, TPQueue *list);
+static void free_queue (TPQueue *list);
 
-static GList *async_call_queue = NULL;
-static GList *async_io_queue = NULL;
+static TPQueue async_call_queue = {NULL, 0, 0};
+static TPQueue async_io_queue = {NULL, 0, 0};
 
+static MonoClass *async_call_klass;
 static MonoClass *socket_async_call_klass;
+static MonoClass *process_async_call_klass;
 
 #define INIT_POLLFD(a, b, c) {(a)->fd = b; (a)->events = c; (a)->revents = 0;}
 enum {
@@ -122,6 +143,9 @@ enum {
        AIO_OP_RECEIVEFROM,
        AIO_OP_SEND,
        AIO_OP_SENDTO,
+       AIO_OP_RECV_JUST_CALLBACK,
+       AIO_OP_SEND_JUST_CALLBACK,
+       AIO_OP_READPIPE,
        AIO_OP_LAST
 };
 
@@ -147,10 +171,9 @@ socket_io_cleanup (SocketIOData *data)
        if (data->new_sem)
                CloseHandle (data->new_sem);
        data->new_sem = NULL;
-       g_hash_table_destroy (data->sock_to_state);
+       mono_g_hash_table_destroy (data->sock_to_state);
        data->sock_to_state = NULL;
-       g_list_free (async_io_queue);
-       async_io_queue = NULL;
+       free_queue (&async_io_queue);
        release = (gint) InterlockedCompareExchange (&io_worker_threads, 0, -1);
        if (io_job_added)
                ReleaseSemaphore (io_job_added, release, NULL);
@@ -169,9 +192,12 @@ get_event_from_state (MonoSocketAsyncResult *state)
        switch (state->operation) {
        case AIO_OP_ACCEPT:
        case AIO_OP_RECEIVE:
+       case AIO_OP_RECV_JUST_CALLBACK:
        case AIO_OP_RECEIVEFROM:
+       case AIO_OP_READPIPE:
                return MONO_POLLIN;
        case AIO_OP_SEND:
+       case AIO_OP_SEND_JUST_CALLBACK:
        case AIO_OP_SENDTO:
        case AIO_OP_CONNECT:
                return MONO_POLLOUT;
@@ -182,15 +208,14 @@ get_event_from_state (MonoSocketAsyncResult *state)
 }
 
 static int
-get_events_from_list (GSList *list)
+get_events_from_list (MonoMList *list)
 {
        MonoSocketAsyncResult *state;
        int events = 0;
 
-       while (list && list->data) {
-               state = (MonoSocketAsyncResult *) list->data;
+       while (list && (state = (MonoSocketAsyncResult *)mono_mlist_get_data (list))) {
                events |= get_event_from_state (state);
-               list = list->next;
+               list = mono_mlist_next (list);
        }
 
        return events;
@@ -211,7 +236,7 @@ async_invoke_io_thread (gpointer data)
        MonoThread *thread;
        thread = mono_thread_current ();
        thread->threadpool_thread = TRUE;
-       thread->state |= ThreadState_Background;
+       ves_icall_System_Threading_Thread_SetState (thread, ThreadState_Background);
 
        for (;;) {
                MonoSocketAsyncResult *state;
@@ -221,8 +246,6 @@ async_invoke_io_thread (gpointer data)
                if (state) {
                        InterlockedDecrement (&pending_io_items);
                        ar = state->ares;
-                       /* worker threads invokes methods in different domains,
-                        * so we need to set the right domain here */
                        switch (state->operation) {
                        case AIO_OP_RECEIVE:
                                state->total = ICALL_RECV (state);
@@ -232,12 +255,22 @@ async_invoke_io_thread (gpointer data)
                                break;
                        }
 
+                       /* worker threads invokes methods in different domains,
+                        * so we need to set the right domain here */
                        domain = ((MonoObject *)ar)->vtable->domain;
+                       mono_thread_push_appdomain_ref (domain);
                        if (mono_domain_set (domain, FALSE)) {
-                               mono_thread_push_appdomain_ref (domain);
+                               ASyncCall *ac;
+
                                mono_async_invoke (ar);
-                               mono_thread_pop_appdomain_ref ();
+                               ac = (ASyncCall *) ar->object_data;
+                               /*
+                               if (ac->msg->exc != NULL)
+                                       mono_unhandled_exception (ac->msg->exc);
+                               */
+                               mono_domain_set (mono_get_root_domain (), TRUE);
                        }
+                       mono_thread_pop_appdomain_ref ();
                        InterlockedDecrement (&busy_io_worker_threads);
                }
 
@@ -250,7 +283,7 @@ async_invoke_io_thread (gpointer data)
                        
                        do {
                                wr = WaitForSingleObjectEx (io_job_added, (guint32)timeout, TRUE);
-                               if ((thread->state & ThreadState_StopRequested)!=0)
+                               if (THREAD_WANTS_A_BREAK (thread))
                                        mono_thread_interruption_checkpoint ();
                        
                                timeout -= GetTickCount () - start_time;
@@ -291,32 +324,31 @@ start_io_thread_or_queue (MonoSocketAsyncResult *ares)
                InterlockedIncrement (&busy_io_worker_threads);
                InterlockedIncrement (&io_worker_threads);
                domain = ((ares) ? ((MonoObject *) ares)->vtable->domain : mono_domain_get ());
-               mono_thread_create (domain, async_invoke_io_thread, ares);
+               mono_thread_create (mono_get_root_domain (), async_invoke_io_thread, ares);
        } else {
-               append_job (&io_queue_lock, &async_io_queue, ares);
+               append_job (&io_queue_lock, &async_io_queue, (MonoObject*)ares);
                ReleaseSemaphore (io_job_added, 1, NULL);
        }
 }
 
-static GSList *
-process_io_event (GSList *list, int event)
+static MonoMList *
+process_io_event (MonoMList *list, int event)
 {
        MonoSocketAsyncResult *state;
-       GSList *oldlist;
+       MonoMList *oldlist;
 
        oldlist = list;
        state = NULL;
        while (list) {
-               state = (MonoSocketAsyncResult *) list->data;
+               state = (MonoSocketAsyncResult *) mono_mlist_get_data (list);
                if (get_event_from_state (state) == event)
                        break;
                
-               list = list->next;
+               list = mono_mlist_next (list);
        }
 
        if (list != NULL) {
-               oldlist = g_slist_remove_link (oldlist, list);
-               g_slist_free_1 (list);
+               oldlist = mono_mlist_remove_item (oldlist, list);
 #ifdef EPOLL_DEBUG
                g_print ("Dispatching event %d on socket %d\n", event, state->handle);
 #endif
@@ -339,7 +371,7 @@ mark_bad_fds (mono_pollfd *pfds, int nfds)
                if (pfd->fd == -1)
                        continue;
 
-               ret = mono_poll (pfds, 1, 0);
+               ret = mono_poll (pfd, 1, 0);
                if (ret == -1 && errno == EBADF) {
                        pfd->revents |= MONO_POLLNVAL;
                        count++;
@@ -365,7 +397,7 @@ socket_io_poll_main (gpointer p)
 
        thread = mono_thread_current ();
        thread->threadpool_thread = TRUE;
-       thread->state |= ThreadState_Background;
+       ves_icall_System_Threading_Thread_SetState (thread, ThreadState_Background);
 
        allocated = INITIAL_POLLFD_SIZE;
        pfds = g_new0 (mono_pollfd, allocated);
@@ -377,11 +409,11 @@ socket_io_poll_main (gpointer p)
                int nsock = 0;
                mono_pollfd *pfd;
                char one [1];
-               GSList *list;
+               MonoMList *list;
 
                do {
                        if (nsock == -1) {
-                               if ((thread->state & ThreadState_StopRequested) != 0)
+                               if (THREAD_WANTS_A_BREAK (thread))
                                        mono_thread_interruption_checkpoint ();
                        }
 
@@ -458,6 +490,7 @@ socket_io_poll_main (gpointer p)
                EnterCriticalSection (&data->io_lock);
                if (data->inited == 0) {
                        g_free (pfds);
+                       LeaveCriticalSection (&data->io_lock);
                        return; /* cleanup called */
                }
 
@@ -467,7 +500,7 @@ socket_io_poll_main (gpointer p)
                                continue;
 
                        nsock--;
-                       list = g_hash_table_lookup (data->sock_to_state, GINT_TO_POINTER (pfd->fd));
+                       list = mono_g_hash_table_lookup (data->sock_to_state, GINT_TO_POINTER (pfd->fd));
                        if (list != NULL && (pfd->revents & (MONO_POLLIN | POLL_ERRORS)) != 0) {
                                list = process_io_event (list, MONO_POLLIN);
                        }
@@ -477,10 +510,10 @@ socket_io_poll_main (gpointer p)
                        }
 
                        if (list != NULL) {
-                               g_hash_table_replace (data->sock_to_state, GINT_TO_POINTER (pfd->fd), list);
+                               mono_g_hash_table_replace (data->sock_to_state, GINT_TO_POINTER (pfd->fd), list);
                                pfd->events = get_events_from_list (list);
                        } else {
-                               g_hash_table_remove (data->sock_to_state, GINT_TO_POINTER (pfd->fd));
+                               mono_g_hash_table_remove (data->sock_to_state, GINT_TO_POINTER (pfd->fd));
                                pfd->fd = -1;
                                if (i == maxfd - 1)
                                        maxfd--;
@@ -506,18 +539,14 @@ socket_io_epoll_main (gpointer p)
        epollfd = data->epollfd;
        thread = mono_thread_current ();
        thread->threadpool_thread = TRUE;
-       thread->state |= ThreadState_Background;
+       ves_icall_System_Threading_Thread_SetState (thread, ThreadState_Background);
        events = g_new0 (struct epoll_event, nevents);
 
        while (1) {
                do {
                        if (ready == -1) {
-                               if ((thread->state & ThreadState_StopRequested) != 0) {
-                                       g_free (events);
-                                       close (epollfd);
+                               if (THREAD_WANTS_A_BREAK (thread))
                                        mono_thread_interruption_checkpoint ();
-                                       g_assert_not_reached ();
-                               }
                        }
 #ifdef EPOLL_DEBUG
                        g_print ("epoll_wait init\n");
@@ -554,13 +583,13 @@ socket_io_epoll_main (gpointer p)
 
                for (i = 0; i < ready; i++) {
                        int fd;
-                       GSList *list;
+                       MonoMList *list;
 
                        evt = &events [i];
                        fd = evt->data.fd;
-                       list = g_hash_table_lookup (data->sock_to_state, GINT_TO_POINTER (fd));
+                       list = mono_g_hash_table_lookup (data->sock_to_state, GINT_TO_POINTER (fd));
 #ifdef EPOLL_DEBUG
-                       g_print ("Event %d on %d list length: %d\n", evt->events, fd, g_slist_length (list));
+                       g_print ("Event %d on %d list length: %d\n", evt->events, fd, mono_mlist_length (list));
 #endif
                        if (list != NULL && (evt->events & (EPOLLIN | EPOLL_ERRORS)) != 0) {
                                list = process_io_event (list, MONO_POLLIN);
@@ -571,7 +600,7 @@ socket_io_epoll_main (gpointer p)
                        }
 
                        if (list != NULL) {
-                               g_hash_table_replace (data->sock_to_state, GINT_TO_POINTER (fd), list);
+                               mono_g_hash_table_replace (data->sock_to_state, GINT_TO_POINTER (fd), list);
                                evt->events = get_events_from_list (list);
 #ifdef EPOLL_DEBUG
                                g_print ("MOD %d to %d\n", fd, evt->events);
@@ -586,7 +615,7 @@ socket_io_epoll_main (gpointer p)
                                        }
                                }
                        } else {
-                               g_hash_table_remove (data->sock_to_state, GINT_TO_POINTER (fd));
+                               mono_g_hash_table_remove (data->sock_to_state, GINT_TO_POINTER (fd));
 #ifdef EPOLL_DEBUG
                                g_print ("DEL %d\n", fd);
 #endif
@@ -598,6 +627,44 @@ socket_io_epoll_main (gpointer p)
 }
 #endif
 
+/*
+ * select/poll wake up when a socket is closed, but epoll just removes
+ * the socket from its internal list without notification.
+ */
+void
+mono_thread_pool_remove_socket (int sock)
+{
+#ifdef HAVE_EPOLL
+       MonoMList *list, *next;
+       MonoSocketAsyncResult *state;
+
+       if (socket_io_data.epoll_disabled == TRUE || socket_io_data.inited == FALSE)
+               return;
+
+       EnterCriticalSection (&socket_io_data.io_lock);
+       list = mono_g_hash_table_lookup (socket_io_data.sock_to_state, GINT_TO_POINTER (sock));
+       if (list) {
+               mono_g_hash_table_remove (socket_io_data.sock_to_state, GINT_TO_POINTER (sock));
+       }
+       LeaveCriticalSection (&socket_io_data.io_lock);
+       
+       while (list) {
+               state = (MonoSocketAsyncResult *) mono_mlist_get_data (list);
+               if (state->operation == AIO_OP_RECEIVE)
+                       state->operation = AIO_OP_RECV_JUST_CALLBACK;
+               else if (state->operation == AIO_OP_SEND)
+                       state->operation = AIO_OP_SEND_JUST_CALLBACK;
+
+               next = mono_mlist_remove_item (list, list);
+               list = process_io_event (list, MONO_POLLIN);
+               if (list)
+                       process_io_event (list, MONO_POLLOUT);
+
+               list = next;
+       }
+#endif
+}
+
 #ifdef PLATFORM_WIN32
 static void
 connect_hack (gpointer x)
@@ -685,12 +752,18 @@ socket_io_init (SocketIOData *data)
        g_assert (data->pipe [0] != INVALID_SOCKET);
        closesocket (srv);
 #endif
+       mono_io_max_worker_threads = mono_max_worker_threads / 2;
+       if (mono_io_max_worker_threads < 10)
+               mono_io_max_worker_threads = 10;
 
-       data->sock_to_state = g_hash_table_new (g_direct_hash, g_direct_equal);
+       data->sock_to_state = mono_g_hash_table_new_type (g_direct_hash, g_direct_equal, MONO_HASH_VALUE_GC);
 
-       if (data->epoll_disabled)
+       if (data->epoll_disabled) {
                data->new_sem = CreateSemaphore (NULL, 1, 1, NULL);
+               g_assert (data->new_sem != NULL);
+       }
        io_job_added = CreateSemaphore (NULL, 0, 0x7fffffff, NULL);
+       g_assert (io_job_added != NULL);
        InitializeCriticalSection (&io_queue_lock);
        if (data->epoll_disabled) {
                mono_thread_create (mono_get_root_domain (), socket_io_poll_main, data);
@@ -709,25 +782,34 @@ socket_io_add_poll (MonoSocketAsyncResult *state)
 {
        int events;
        char msg [1];
-       GSList *list;
+       MonoMList *list;
        SocketIOData *data = &socket_io_data;
 
+#if defined(PLATFORM_MACOSX) || defined(PLATFORM_BSD6) || defined(PLATFORM_WIN32)
+       /* select() for connect() does not work well on the Mac. Bug #75436. */
+       /* Bug #77637 for the BSD 6 case */
+       /* Bug #78888 for the Windows case */
+       if (state->operation == AIO_OP_CONNECT && state->blocking == TRUE) {
+               start_io_thread_or_queue (state);
+               return;
+       }
+#endif
        WaitForSingleObject (data->new_sem, INFINITE);
        if (data->newpfd == NULL)
                data->newpfd = g_new0 (mono_pollfd, 1);
 
        EnterCriticalSection (&data->io_lock);
-       list = g_hash_table_lookup (data->sock_to_state, GINT_TO_POINTER (state->handle));
+       /* FIXME: 64 bit issue: handle can be a pointer on windows? */
+       list = mono_g_hash_table_lookup (data->sock_to_state, GINT_TO_POINTER (state->handle));
        if (list == NULL) {
-               list = g_slist_alloc ();
-               list->data = state;
+               list = mono_mlist_alloc ((MonoObject*)state);
        } else {
-               list = g_slist_append (list, state);
+               list = mono_mlist_append (list, (MonoObject*)state);
        }
 
        events = get_events_from_list (list);
        INIT_POLLFD (data->newpfd, GPOINTER_TO_INT (state->handle), events);
-       g_hash_table_replace (data->sock_to_state, GINT_TO_POINTER (state->handle), list);
+       mono_g_hash_table_replace (data->sock_to_state, GINT_TO_POINTER (state->handle), list);
        LeaveCriticalSection (&data->io_lock);
        *msg = (char) state->operation;
 #ifndef PLATFORM_WIN32
@@ -741,7 +823,7 @@ socket_io_add_poll (MonoSocketAsyncResult *state)
 static gboolean
 socket_io_add_epoll (MonoSocketAsyncResult *state)
 {
-       GSList *list;
+       MonoMList *list;
        SocketIOData *data = &socket_io_data;
        struct epoll_event event;
        int epoll_op, ievt;
@@ -750,13 +832,12 @@ socket_io_add_epoll (MonoSocketAsyncResult *state)
        memset (&event, 0, sizeof (struct epoll_event));
        fd = GPOINTER_TO_INT (state->handle);
        EnterCriticalSection (&data->io_lock);
-       list = g_hash_table_lookup (data->sock_to_state, GINT_TO_POINTER (fd));
+       list = mono_g_hash_table_lookup (data->sock_to_state, GINT_TO_POINTER (fd));
        if (list == NULL) {
-               list = g_slist_alloc ();
-               list->data = state;
+               list = mono_mlist_alloc ((MonoObject*)state);
                epoll_op = EPOLL_CTL_ADD;
        } else {
-               list = g_slist_append (list, state);
+               list = mono_mlist_append (list, (MonoObject*)state);
                epoll_op = EPOLL_CTL_MOD;
        }
 
@@ -766,7 +847,7 @@ socket_io_add_epoll (MonoSocketAsyncResult *state)
        if ((ievt & MONO_POLLOUT) != 0)
                event.events |= EPOLLOUT;
 
-       g_hash_table_replace (data->sock_to_state, state->handle, list);
+       mono_g_hash_table_replace (data->sock_to_state, state->handle, list);
        event.data.fd = fd;
 #ifdef EPOLL_DEBUG
        g_print ("%s %d with %d\n", epoll_op == EPOLL_CTL_ADD ? "ADD" : "MOD", fd, event.events);
@@ -790,7 +871,7 @@ static void
 socket_io_add (MonoAsyncResult *ares, MonoSocketAsyncResult *state)
 {
        socket_io_init (&socket_io_data);
-       state->ares = ares;
+       MONO_OBJECT_SETREF (state, ares, ares);
 #ifdef HAVE_EPOLL
        if (socket_io_data.epoll_disabled == FALSE) {
                if (socket_io_add_epoll (state))
@@ -810,24 +891,31 @@ socket_io_filter (MonoObject *target, MonoObject *state)
        if (target == NULL || state == NULL)
                return FALSE;
 
-       klass = InterlockedCompareExchangePointer ((gpointer *) &socket_async_call_klass, NULL, NULL);
-       if (klass == NULL) {
-               MonoImage *system_assembly = mono_image_loaded ("System");
-
-               if (system_assembly == NULL)
-                       return FALSE;
-
-               klass = mono_class_from_name (system_assembly, "System.Net.Sockets", "Socket/SocketAsyncCall");
-               if (klass == NULL) {
-                       /* Should never happen... */
-                       g_print ("socket_io_filter: SocketAsyncCall class not found.\n");
-                       return FALSE;
-               }
-
-               InterlockedCompareExchangePointer ((gpointer *) &socket_async_call_klass, klass, NULL);
+       if (socket_async_call_klass == NULL) {
+               klass = target->vtable->klass;
+               /* Check if it's SocketAsyncCall in System.Net.Sockets
+                * FIXME: check the assembly is signed correctly for extra care
+                */
+               if (klass->name [0] == 'S' && strcmp (klass->name, "SocketAsyncCall") == 0 
+                               && strcmp (mono_image_get_name (klass->image), "System") == 0
+                               && klass->nested_in && strcmp (klass->nested_in->name, "Socket") == 0)
+                       socket_async_call_klass = klass;
        }
 
-       if (target->vtable->klass != klass)
+       if (process_async_call_klass == NULL) {
+               klass = target->vtable->klass;
+               /* Check if it's AsyncReadHandler in System.Diagnostics.Process
+                * FIXME: check the assembly is signed correctly for extra care
+                */
+               if (klass->name [0] == 'A' && strcmp (klass->name, "AsyncReadHandler") == 0 
+                               && strcmp (mono_image_get_name (klass->image), "System") == 0
+                               && klass->nested_in && strcmp (klass->nested_in->name, "Process") == 0)
+                       process_async_call_klass = klass;
+       }
+       /* return both when socket_async_call_klass has not been seen yet and when
+        * the object is not an instance of the class.
+        */
+       if (target->vtable->klass != socket_async_call_klass && target->vtable->klass != process_async_call_klass)
                return FALSE;
 
        op = sock_res->operation;
@@ -840,11 +928,25 @@ socket_io_filter (MonoObject *target, MonoObject *state)
 static void
 mono_async_invoke (MonoAsyncResult *ares)
 {
-       ASyncCall *ac = (ASyncCall *)ares->data;
+       ASyncCall *ac = (ASyncCall *)ares->object_data;
+       MonoThread *thread = NULL;
+       MonoObject *res, *exc = NULL;
+       MonoArray *out_args = NULL;
+
+       if (ares->execution_context) {
+               /* use captured ExecutionContext (if available) */
+               thread = mono_thread_current ();
+               MONO_OBJECT_SETREF (ares, original_context, thread->execution_context);
+               MONO_OBJECT_SETREF (thread, execution_context, ares->execution_context);
+       } else {
+               ares->original_context = NULL;
+       }
 
        ac->msg->exc = NULL;
-       ac->res = mono_message_invoke (ares->async_delegate, ac->msg, 
-                                      &ac->msg->exc, &ac->out_args);
+       res = mono_message_invoke (ares->async_delegate, ac->msg, &exc, &out_args);
+       MONO_OBJECT_SETREF (ac, res, res);
+       MONO_OBJECT_SETREF (ac, msg->exc, exc);
+       MONO_OBJECT_SETREF (ac, out_args, out_args);
 
        ares->completed = 1;
 
@@ -853,15 +955,23 @@ mono_async_invoke (MonoAsyncResult *ares)
                MonoObject *exc = NULL;
                void *pa = &ares;
                mono_runtime_invoke (ac->cb_method, ac->cb_target, pa, &exc);
-               if (!ac->msg->exc)
-                       ac->msg->exc = exc;
+               /* 'exc' will be the previous ac->msg->exc if not NULL and not
+                * catched. If catched, this will be set to NULL and the
+                * exception will not be printed. */
+               MONO_OBJECT_SETREF (ac->msg, exc, exc);
+       }
+
+       /* restore original thread execution context if flow isn't suppressed, i.e. non null */
+       if (ares->original_context) {
+               MONO_OBJECT_SETREF (thread, execution_context, ares->original_context);
+               ares->original_context = NULL;
        }
 
        /* notify listeners */
        mono_monitor_enter ((MonoObject *) ares);
        if (ares->handle != NULL) {
-               ac->wait_event = ((MonoWaitHandle *) ares->handle)->handle;
-               SetEvent (ac->wait_event);
+               ac->wait_event = (gsize) mono_wait_handle_get_handle ((MonoWaitHandle *) ares->handle);
+               SetEvent ((gpointer)(gsize)ac->wait_event);
        }
        mono_monitor_exit ((MonoObject *) ares);
 
@@ -880,18 +990,23 @@ mono_thread_pool_init ()
                return;
 
        MONO_GC_REGISTER_ROOT (ares_htable);
+       MONO_GC_REGISTER_ROOT (socket_io_data.sock_to_state);
        InitializeCriticalSection (&socket_io_data.io_lock);
        InitializeCriticalSection (&ares_lock);
-       ares_htable = mono_g_hash_table_new (NULL, NULL);
+       ares_htable = mono_g_hash_table_new_type (NULL, NULL, MONO_HASH_KEY_VALUE_GC);
        job_added = CreateSemaphore (NULL, 0, 0x7fffffff, NULL);
+       g_assert (job_added != NULL);
        GetSystemInfo (&info);
-       if (getenv ("MONO_THREADS_PER_CPU") != NULL) {
-               threads_per_cpu = atoi (getenv ("MONO_THREADS_PER_CPU"));
+       if (g_getenv ("MONO_THREADS_PER_CPU") != NULL) {
+               threads_per_cpu = atoi (g_getenv ("MONO_THREADS_PER_CPU"));
                if (threads_per_cpu <= 0)
                        threads_per_cpu = THREADS_PER_CPU;
        }
 
-       mono_max_worker_threads = threads_per_cpu * info.dwNumberOfProcessors;
+       mono_max_worker_threads = 20 + threads_per_cpu * info.dwNumberOfProcessors;
+
+       async_call_klass = mono_class_from_name (mono_defaults.corlib, "System", "MonoAsyncCall");
+       g_assert (async_call_klass);
 }
 
 MonoAsyncResult *
@@ -902,23 +1017,17 @@ mono_thread_pool_add (MonoObject *target, MonoMethodMessage *msg, MonoDelegate *
        MonoAsyncResult *ares;
        ASyncCall *ac;
 
-#ifdef HAVE_BOEHM_GC
-       ac = GC_MALLOC (sizeof (ASyncCall));
-#else
-       /* We'll leak the event if creaated... */
-       ac = g_new0 (ASyncCall, 1);
-#endif
-       ac->wait_event = NULL;
-       ac->msg = msg;
-       ac->state = state;
+       ac = (ASyncCall*)mono_object_new (mono_domain_get (), async_call_klass);
+       MONO_OBJECT_SETREF (ac, msg, msg);
+       MONO_OBJECT_SETREF (ac, state, state);
 
        if (async_callback) {
                ac->cb_method = mono_get_delegate_invoke (((MonoObject *)async_callback)->vtable->klass);
-               ac->cb_target = async_callback;
+               MONO_OBJECT_SETREF (ac, cb_target, async_callback);
        }
 
-       ares = mono_async_result_new (domain, NULL, ac->state, ac);
-       ares->async_delegate = target;
+       ares = mono_async_result_new (domain, NULL, ac->state, NULL, (MonoObject*)ac);
+       MONO_OBJECT_SETREF (ares, async_delegate, target);
 
        EnterCriticalSection (&ares_lock);
        mono_g_hash_table_insert (ares_htable, ares, ares);
@@ -937,7 +1046,6 @@ static void
 start_thread_or_queue (MonoAsyncResult *ares)
 {
        int busy, worker;
-       MonoDomain *domain;
 
        busy = (int) InterlockedCompareExchange (&busy_worker_threads, 0, -1);
        worker = (int) InterlockedCompareExchange (&mono_worker_threads, 0, -1); 
@@ -945,10 +1053,9 @@ start_thread_or_queue (MonoAsyncResult *ares)
            worker < mono_max_worker_threads) {
                InterlockedIncrement (&mono_worker_threads);
                InterlockedIncrement (&busy_worker_threads);
-               domain = ((MonoObject *) ares)->vtable->domain;
-               mono_thread_create (domain, async_invoke_thread, ares);
+               mono_thread_create (mono_get_root_domain (), async_invoke_thread, ares);
        } else {
-               append_job (&mono_delegate_section, &async_call_queue, ares);
+               append_job (&mono_delegate_section, &async_call_queue, (MonoObject*)ares);
                ReleaseSemaphore (job_added, 1, NULL);
        }
 }
@@ -972,23 +1079,24 @@ mono_thread_pool_finish (MonoAsyncResult *ares, MonoArray **out_args, MonoObject
        }
 
        ares->endinvoke_called = 1;
-       ac = (ASyncCall *)ares->data;
+       ac = (ASyncCall *)ares->object_data;
 
        g_assert (ac != NULL);
 
        /* wait until we are really finished */
        if (!ares->completed) {
                if (ares->handle == NULL) {
-                       ac->wait_event = CreateEvent (NULL, TRUE, FALSE, NULL);
-                       ares->handle = (MonoObject *) mono_wait_handle_new (mono_object_domain (ares), ac->wait_event);
+                       ac->wait_event = (gsize)CreateEvent (NULL, TRUE, FALSE, NULL);
+                       g_assert(ac->wait_event != 0);
+                       MONO_OBJECT_SETREF (ares, handle, (MonoObject *) mono_wait_handle_new (mono_object_domain (ares), (gpointer)(gsize)ac->wait_event));
                }
                mono_monitor_exit ((MonoObject *) ares);
-               WaitForSingleObjectEx (ac->wait_event, INFINITE, TRUE);
+               WaitForSingleObjectEx ((gpointer)(gsize)ac->wait_event, INFINITE, TRUE);
        } else {
                mono_monitor_exit ((MonoObject *) ares);
        }
 
-       *exc = ac->msg->exc;
+       *exc = ac->msg->exc; /* FIXME: GC add write barrier */
        *out_args = ac->out_args;
 
        return ac->res;
@@ -1000,8 +1108,7 @@ mono_thread_pool_cleanup (void)
        gint release;
 
        EnterCriticalSection (&mono_delegate_section);
-       g_list_free (async_call_queue);
-       async_call_queue = NULL;
+       free_queue (&async_call_queue);
        release = (gint) InterlockedCompareExchange (&mono_worker_threads, 0, -1);
        LeaveCriticalSection (&mono_delegate_section);
        if (job_added)
@@ -1011,53 +1118,70 @@ mono_thread_pool_cleanup (void)
 }
 
 static void
-append_job (CRITICAL_SECTION *cs, GList **plist, gpointer ar)
+append_job (CRITICAL_SECTION *cs, TPQueue *list, MonoObject *ar)
 {
-       GList *tmp, *list;
-
        EnterCriticalSection (cs);
-       list = *plist;
-       if (list == NULL) {
-               list = g_list_append (list, ar); 
+       if (list->array && (list->next_elem < mono_array_length (list->array))) {
+               mono_array_setref (list->array, list->next_elem, ar);
+               list->next_elem++;
+               LeaveCriticalSection (cs);
+               return;
+       }
+       if (!list->array) {
+               MONO_GC_REGISTER_ROOT (list->array);
+               list->array = mono_array_new (mono_get_root_domain (), mono_defaults.object_class, 16);
        } else {
-               for (tmp = list; tmp && tmp->data != NULL; tmp = tmp->next);
-               if (tmp == NULL) {
-                       list = g_list_append (list, ar); 
+               int count = list->next_elem - list->first_elem;
+               /* slide the array or create a larger one if it's full */
+               if (list->first_elem) {
+                       mono_array_memcpy_refs (list->array, 0, list->array, list->first_elem, count);
                } else {
-                       tmp->data = ar;
+                       MonoArray *newa = mono_array_new (mono_get_root_domain (), mono_defaults.object_class, mono_array_length (list->array) * 2);
+                       mono_array_memcpy_refs (newa, 0, list->array, list->first_elem, count);
+                       list->array = newa;
                }
+               list->first_elem = 0;
+               list->next_elem = count;
        }
-       *plist = list;
+       mono_array_setref (list->array, list->next_elem, ar);
+       list->next_elem++;
        LeaveCriticalSection (cs);
 }
 
-static gpointer
-dequeue_job (CRITICAL_SECTION *cs, GList **plist)
+static MonoObject*
+dequeue_job (CRITICAL_SECTION *cs, TPQueue *list)
 {
-       gpointer ar = NULL;
-       GList *tmp, *tmp2, *list;
+       MonoObject *ar;
+       int count;
 
        EnterCriticalSection (cs);
-       list = *plist;
-       tmp = list;
-       if (tmp) {
-               ar = tmp->data;
-               tmp->data = NULL;
-               tmp2 = tmp;
-               for (tmp2 = tmp; tmp2->next != NULL; tmp2 = tmp2->next);
-               if (tmp2 != tmp) {
-                       list = tmp->next;
-                       tmp->next = NULL;
-                       tmp2->next = tmp;
-                       tmp->prev = tmp2;
-               }
+       if (!list->array || list->first_elem == list->next_elem) {
+               LeaveCriticalSection (cs);
+               return NULL;
+       }
+       ar = mono_array_get (list->array, MonoObject*, list->first_elem);
+       list->first_elem++;
+       count = list->next_elem - list->first_elem;
+       /* reduce the size of the array if it's mostly empty */
+       if (mono_array_length (list->array) > 16 && count < (mono_array_length (list->array) / 3)) {
+               MonoArray *newa = mono_array_new (mono_get_root_domain (), mono_defaults.object_class, mono_array_length (list->array) / 2);
+               mono_array_memcpy_refs (newa, 0, list->array, list->first_elem, count);
+               list->array = newa;
+               list->first_elem = 0;
+               list->next_elem = count;
        }
-       *plist = list;
        LeaveCriticalSection (cs);
 
        return ar;
 }
 
+static void
+free_queue (TPQueue *list)
+{
+       list->array = NULL;
+       list->first_elem = list->next_elem = 0;
+}
+
 static void
 async_invoke_thread (gpointer data)
 {
@@ -1067,7 +1191,7 @@ async_invoke_thread (gpointer data)
  
        thread = mono_thread_current ();
        thread->threadpool_thread = TRUE;
-       thread->state |= ThreadState_Background;
+       ves_icall_System_Threading_Thread_SetState (thread, ThreadState_Background);
 
        for (;;) {
                MonoAsyncResult *ar;
@@ -1077,16 +1201,24 @@ async_invoke_thread (gpointer data)
                        /* worker threads invokes methods in different domains,
                         * so we need to set the right domain here */
                        domain = ((MonoObject *)ar)->vtable->domain;
+                       mono_thread_push_appdomain_ref (domain);
                        if (mono_domain_set (domain, FALSE)) {
-                               mono_thread_push_appdomain_ref (domain);
+                               ASyncCall *ac;
+
                                mono_async_invoke (ar);
-                               mono_thread_pop_appdomain_ref ();
+                               ac = (ASyncCall *) ar->object_data;
+                               /*
+                               if (ac->msg->exc != NULL)
+                                       mono_unhandled_exception (ac->msg->exc);
+                               */
+                               mono_domain_set (mono_get_root_domain (), TRUE);
                        }
+                       mono_thread_pop_appdomain_ref ();
                        InterlockedDecrement (&busy_worker_threads);
                }
 
                data = dequeue_job (&mono_delegate_section, &async_call_queue);
-       
+
                if (!data) {
                        guint32 wr;
                        int timeout = 10000;
@@ -1094,7 +1226,7 @@ async_invoke_thread (gpointer data)
                        
                        do {
                                wr = WaitForSingleObjectEx (job_added, (guint32)timeout, TRUE);
-                               if ((thread->state & ThreadState_StopRequested)!=0)
+                               if (THREAD_WANTS_A_BREAK (thread))
                                        mono_thread_interruption_checkpoint ();
                        
                                timeout -= GetTickCount () - start_time;
@@ -1111,7 +1243,7 @@ async_invoke_thread (gpointer data)
        
                        while (!data && workers <= min) {
                                WaitForSingleObjectEx (job_added, INFINITE, TRUE);
-                               if ((thread->state & ThreadState_StopRequested)!=0)
+                               if (THREAD_WANTS_A_BREAK (thread))
                                        mono_thread_interruption_checkpoint ();
                        
                                data = dequeue_job (&mono_delegate_section, &async_call_queue);
@@ -1134,13 +1266,14 @@ async_invoke_thread (gpointer data)
 void
 ves_icall_System_Threading_ThreadPool_GetAvailableThreads (gint *workerThreads, gint *completionPortThreads)
 {
-       gint busy;
+       gint busy, busy_io;
 
        MONO_ARCH_SAVE_REGS;
 
        busy = (gint) InterlockedCompareExchange (&busy_worker_threads, 0, -1);
+       busy_io = (gint) InterlockedCompareExchange (&busy_io_worker_threads, 0, -1);
        *workerThreads = mono_max_worker_threads - busy;
-       *completionPortThreads = 0;
+       *completionPortThreads = mono_io_max_worker_threads - busy_io;
 }
 
 void
@@ -1149,19 +1282,21 @@ ves_icall_System_Threading_ThreadPool_GetMaxThreads (gint *workerThreads, gint *
        MONO_ARCH_SAVE_REGS;
 
        *workerThreads = mono_max_worker_threads;
-       *completionPortThreads = 0;
+       *completionPortThreads = mono_io_max_worker_threads;
 }
 
 void
 ves_icall_System_Threading_ThreadPool_GetMinThreads (gint *workerThreads, gint *completionPortThreads)
 {
-       gint workers;
+       gint workers, workers_io;
 
        MONO_ARCH_SAVE_REGS;
 
        workers = (gint) InterlockedCompareExchange (&mono_min_worker_threads, 0, -1);
+       workers_io = (gint) InterlockedCompareExchange (&mono_io_min_worker_threads, 0, -1);
+
        *workerThreads = workers;
-       *completionPortThreads = 0;
+       *completionPortThreads = workers_io;
 }
 
 MonoBoolean
@@ -1171,8 +1306,28 @@ ves_icall_System_Threading_ThreadPool_SetMinThreads (gint workerThreads, gint co
 
        if (workerThreads < 0 || workerThreads > mono_max_worker_threads)
                return FALSE;
+
+       if (completionPortThreads < 0 || completionPortThreads > mono_io_max_worker_threads)
+               return FALSE;
+
        InterlockedExchange (&mono_min_worker_threads, workerThreads);
+       InterlockedExchange (&mono_io_min_worker_threads, completionPortThreads);
        /* FIXME: should actually start the idle threads if needed */
        return TRUE;
 }
 
+MonoBoolean
+ves_icall_System_Threading_ThreadPool_SetMaxThreads (gint workerThreads, gint completionPortThreads)
+{
+       MONO_ARCH_SAVE_REGS;
+
+       if (workerThreads < mono_max_worker_threads)
+               return FALSE;
+
+       if (completionPortThreads < mono_io_max_worker_threads)
+               return FALSE;
+
+       InterlockedExchange (&mono_max_worker_threads, workerThreads);
+       InterlockedExchange (&mono_io_max_worker_threads, completionPortThreads);
+       return TRUE;
+}