Merge pull request #5714 from alexischr/update_bockbuild
[mono.git] / mono / metadata / threadpool.c
index dadaa5088722ddb1d253e3505a7e969d94b83bc3..a1424beaebaea483723c7cda7050bed4182730f6 100644 (file)
-/*
- * threadpool.c: global thread pool
+/**
+ * \file
+ * Microsoft threadpool runtime support
  *
- * Authors:
- *   Dietmar Maurer (dietmar@ximian.com)
- *   Gonzalo Paniagua Javier (gonzalo@ximian.com)
+ * Author:
+ *     Ludovic Henry (ludovic.henry@xamarin.com)
  *
- * Copyright 2001-2003 Ximian, Inc (http://www.ximian.com)
- * Copyright 2004-2010 Novell, Inc (http://www.novell.com)
- * Copyright 2001 Xamarin Inc (http://www.xamarin.com)
+ * Copyright 2015 Xamarin, Inc (http://www.xamarin.com)
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
  */
 
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+// Files:
+//  - src/vm/comthreadpool.cpp
+//  - src/vm/win32threadpoolcpp
+//  - src/vm/threadpoolrequest.cpp
+//  - src/vm/hillclimbing.cpp
+//
+// Ported from C++ to C and adjusted to Mono runtime
+
+#include <stdlib.h>
+#define _USE_MATH_DEFINES // needed by MSVC to define math constants
+#include <math.h>
 #include <config.h>
 #include <glib.h>
 
-#include <mono/metadata/profiler-private.h>
-#include <mono/metadata/threads.h>
-#include <mono/metadata/threads-types.h>
-#include <mono/metadata/threadpool-internals.h>
+#include <mono/metadata/class-internals.h>
 #include <mono/metadata/exception.h>
-#include <mono/metadata/environment.h>
-#include <mono/metadata/mono-config.h>
-#include <mono/metadata/mono-mlist.h>
-#include <mono/metadata/mono-perfcounters.h>
-#include <mono/metadata/socket-io.h>
-#include <mono/metadata/mono-cq.h>
-#include <mono/metadata/mono-wsq.h>
-#include <mono/metadata/mono-ptr-array.h>
+#include <mono/metadata/gc-internals.h>
+#include <mono/metadata/object.h>
 #include <mono/metadata/object-internals.h>
-#include <mono/io-layer/io-layer.h>
-#include <mono/utils/mono-time.h>
-#include <mono/utils/mono-proclib.h>
-#include <mono/utils/mono-semaphore.h>
+#include <mono/metadata/threadpool.h>
+#include <mono/metadata/threadpool-worker.h>
+#include <mono/metadata/threadpool-io.h>
+#include <mono/metadata/w32event.h>
 #include <mono/utils/atomic.h>
-#include <errno.h>
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#include <sys/types.h>
-#include <fcntl.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <string.h>
-#include <math.h>
-#ifdef HAVE_SYS_SOCKET_H
-#include <sys/socket.h>
-#endif
-#include <mono/utils/mono-poll.h>
-#ifdef HAVE_EPOLL
-#include <sys/epoll.h>
-#endif
-#ifdef HAVE_KQUEUE
-#include <sys/event.h>
-#endif
-
-
-#ifndef DISABLE_SOCKETS
-#include "mono/io-layer/socket-wrappers.h"
-#endif
-
-#include "threadpool.h"
-#include "threadpool-ms.h"
-#include "threadpool-ms-io.h"
-
-static gboolean
-use_ms_threadpool (void)
-{
-       static gboolean use_ms_tp = -1;
-       const gchar *mono_threadpool_env;
-       if (use_ms_tp != -1)
-               return use_ms_tp;
-       else if (!(mono_threadpool_env = g_getenv ("MONO_THREADPOOL")))
-               return use_ms_tp = FALSE;
-       else if (strcmp (mono_threadpool_env, "microsoft") == 0)
-               return use_ms_tp = TRUE;
-       else
-               return use_ms_tp = FALSE;
-}
-
-#define THREAD_WANTS_A_BREAK(t) ((t->state & (ThreadState_StopRequested | \
-                                               ThreadState_SuspendRequested)) != 0)
-
-/* DEBUG: prints tp data every 2s */
-#undef DEBUG 
-
-/* mono_thread_pool_init called */
-static volatile int tp_inited;
-
-enum {
-       POLL_BACKEND,
-       EPOLL_BACKEND,
-       KQUEUE_BACKEND
-};
-
-enum {
-       MONITOR_STATE_AWAKE,
-       MONITOR_STATE_FALLING_ASLEEP,
-       MONITOR_STATE_SLEEPING
-};
-
-static SocketIOData socket_io_data;
+#include <mono/utils/mono-compiler.h>
+#include <mono/utils/mono-complex.h>
+#include <mono/utils/mono-lazy-init.h>
+#include <mono/utils/mono-logger.h>
+#include <mono/utils/mono-logger-internals.h>
+#include <mono/utils/mono-proclib.h>
+#include <mono/utils/mono-threads.h>
+#include <mono/utils/mono-time.h>
+#include <mono/utils/refcount.h>
+#include <mono/utils/mono-os-wait.h>
 
 typedef struct {
-       MonoSemType lock;
-       MonoCQ *queue; /* GC root */
-       MonoSemType new_job;
-       volatile gint waiting; /* threads waiting for a work item */
-
-       /**/
-       volatile gint pool_status; /* 0 -> not initialized, 1 -> initialized, 2 -> cleaning up */
-       /* min, max, n and busy -> Interlocked */
-       volatile gint min_threads;
-       volatile gint max_threads;
-       volatile gint nthreads;
-       volatile gint busy_threads;
-
-       void (*async_invoke) (gpointer data);
-       void *pc_nitems; /* Performance counter for total number of items in added */
-       void *pc_nthreads; /* Performance counter for total number of active threads */
-       /**/
-       volatile gint destroy_thread;
-#if DEBUG
-       volatile gint32 njobs;
-#endif
-       volatile gint32 nexecuted;
-       gboolean is_io;
-} ThreadPool;
+       MonoDomain *domain;
+       /* Number of outstanding jobs */
+       gint32 outstanding_request;
+       /* Number of currently executing jobs */
+       gint32 threadpool_jobs;
+       /* Signalled when threadpool_jobs + outstanding_request is 0 */
+       /* Protected by threadpool.domains_lock */
+       MonoCoopCond cleanup_cond;
+} ThreadPoolDomain;
+
+typedef union {
+       struct {
+               gint16 starting; /* starting, but not yet in worker_callback */
+               gint16 working; /* executing worker_callback */
+       } _;
+       gint32 as_gint32;
+} ThreadPoolCounter;
 
-static ThreadPool async_tp;
-static ThreadPool async_io_tp;
-
-static void async_invoke_thread (gpointer data);
-static MonoObject *mono_async_invoke (ThreadPool *tp, MonoAsyncResult *ares);
-static void threadpool_free_queue (ThreadPool *tp);
-static void threadpool_append_job (ThreadPool *tp, MonoObject *ar);
-static void threadpool_append_jobs (ThreadPool *tp, MonoObject **jobs, gint njobs);
-static void threadpool_init (ThreadPool *tp, int min_threads, int max_threads, void (*async_invoke) (gpointer));
-static void threadpool_start_idle_threads (ThreadPool *tp);
-static void threadpool_kill_idle_threads (ThreadPool *tp);
-static gboolean threadpool_start_thread (ThreadPool *tp);
-static void threadpool_kill_thread (ThreadPool *tp);
-static void monitor_thread (gpointer data);
-static int get_event_from_state (MonoSocketAsyncResult *state);
-
-static MonoClass *async_call_klass;
-static MonoClass *socket_async_call_klass;
-static MonoClass *process_async_call_klass;
-
-static GPtrArray *threads;
-mono_mutex_t threads_lock;
-static GPtrArray *wsqs;
-mono_mutex_t wsqs_lock;
-static gboolean suspended;
-
-static volatile gint32 monitor_njobs = 0;
-static volatile gint32 monitor_state;
-static MonoSemType monitor_sem;
-static MonoInternalThread *monitor_internal_thread;
-
-/* Hooks */
-static MonoThreadPoolFunc tp_start_func;
-static MonoThreadPoolFunc tp_finish_func;
-static gpointer tp_hooks_user_data;
-static MonoThreadPoolItemFunc tp_item_begin_func;
-static MonoThreadPoolItemFunc tp_item_end_func;
-static gpointer tp_item_user_data;
-
-enum {
-       AIO_OP_FIRST,
-       AIO_OP_ACCEPT = 0,
-       AIO_OP_CONNECT,
-       AIO_OP_RECEIVE,
-       AIO_OP_RECEIVEFROM,
-       AIO_OP_SEND,
-       AIO_OP_SENDTO,
-       AIO_OP_RECV_JUST_CALLBACK,
-       AIO_OP_SEND_JUST_CALLBACK,
-       AIO_OP_READPIPE,
-       AIO_OP_CONSOLE2,
-       AIO_OP_DISCONNECT,
-       AIO_OP_ACCEPTRECEIVE,
-       AIO_OP_RECEIVE_BUFFERS,
-       AIO_OP_SEND_BUFFERS,
-       AIO_OP_LAST
-};
-
-// #include <mono/metadata/tpool-poll.c>
-gpointer tp_poll_init (SocketIOData *data);
-
-#ifdef HAVE_EPOLL
-#include <mono/metadata/tpool-epoll.c>
-#elif defined(USE_KQUEUE_FOR_THREADPOOL)
-#include <mono/metadata/tpool-kqueue.c>
-#endif
-/*
- * Functions to check whenever a class is given system class. We need to cache things in MonoDomain since some of the
- * assemblies can be unloaded.
- */
+typedef struct {
+       MonoRefCount ref;
 
-static gboolean
-is_system_type (MonoDomain *domain, MonoClass *klass)
-{
-       if (domain->system_image == NULL)
-               domain->system_image = mono_image_loaded ("System");
+       GPtrArray *domains; // ThreadPoolDomain* []
+       MonoCoopMutex domains_lock;
 
-       return klass->image == domain->system_image;
-}
+       ThreadPoolCounter counters;
 
-static gboolean
-is_corlib_type (MonoDomain *domain, MonoClass *klass)
-{
-       return klass->image == mono_defaults.corlib;
-}
+       gint32 limit_io_min;
+       gint32 limit_io_max;
+} ThreadPool;
 
-#define check_type_cached(domain, ASSEMBLY, _class, _namespace, _name, loc) do { \
-       if (*loc) \
-               return *loc == _class; \
-       if (is_##ASSEMBLY##_type (domain, _class) && !strcmp (_name, _class->name) && !strcmp (_namespace, _class->name_space)) { \
-               *loc = _class; \
-               return TRUE; \
-       } \
-       return FALSE; \
-} while (0) \
+static mono_lazy_init_t status = MONO_LAZY_INIT_STATUS_NOT_INITIALIZED;
 
-#define check_corlib_type_cached(domain, _class, _namespace, _name, loc) check_type_cached (domain, corlib, _class, _namespace, _name, loc)
+static ThreadPool threadpool;
 
-#define check_system_type_cached(domain, _class, _namespace, _name, loc) check_type_cached (domain, system, _class, _namespace, _name, loc)
+#define COUNTER_ATOMIC(var,block) \
+       do { \
+               ThreadPoolCounter __old; \
+               do { \
+                       (var) = __old = COUNTER_READ (); \
+                       { block; } \
+                       if (!(counter._.starting >= 0)) \
+                               g_error ("%s: counter._.starting = %d, but should be >= 0", __func__, counter._.starting); \
+                       if (!(counter._.working >= 0)) \
+                               g_error ("%s: counter._.working = %d, but should be >= 0", __func__, counter._.working); \
+               } while (InterlockedCompareExchange (&threadpool.counters.as_gint32, (var).as_gint32, __old.as_gint32) != __old.as_gint32); \
+       } while (0)
 
-static gboolean
-is_corlib_asyncresult (MonoDomain *domain, MonoClass *klass)
+static inline ThreadPoolCounter
+COUNTER_READ (void)
 {
-       check_corlib_type_cached (domain, klass, "System.Runtime.Remoting.Messaging", "AsyncResult", &domain->corlib_asyncresult_class);
+       ThreadPoolCounter counter;
+       counter.as_gint32 = InterlockedRead (&threadpool.counters.as_gint32);
+       return counter;
 }
 
-static gboolean
-is_socketasyncresult (MonoDomain *domain, MonoClass *klass)
+static inline void
+domains_lock (void)
 {
-       static MonoClass *socket_async_result_klass = NULL;
-       check_system_type_cached (domain, klass, "System.Net.Sockets", "SocketAsyncResult", &socket_async_result_klass);
+       mono_coop_mutex_lock (&threadpool.domains_lock);
 }
 
-static gboolean
-is_socketasynccall (MonoDomain *domain, MonoClass *klass)
+static inline void
+domains_unlock (void)
 {
-       static MonoClass *socket_async_callback_klass = NULL;
-       check_system_type_cached (domain, klass, "System.Net.Sockets", "SocketAsyncCallback", &socket_async_callback_klass);
+       mono_coop_mutex_unlock (&threadpool.domains_lock);
 }
 
-static gboolean
-is_appdomainunloaded_exception (MonoDomain *domain, MonoClass *klass)
+static void
+destroy (gpointer unused)
 {
-       check_corlib_type_cached (domain, klass, "System", "AppDomainUnloadedException", &domain->ad_unloaded_ex_class);
+       g_ptr_array_free (threadpool.domains, TRUE);
+       mono_coop_mutex_destroy (&threadpool.domains_lock);
 }
 
-static gboolean
-is_sd_process (MonoDomain *domain, MonoClass *klass)
-{
-       check_system_type_cached (domain, klass, "System.Diagnostics", "Process", &domain->process_class);
-}
+static void
+worker_callback (void);
 
-static gboolean
-is_sdp_asyncreadhandler (MonoDomain *domain, MonoClass *klass)
+static void
+initialize (void)
 {
+       g_assert (sizeof (ThreadPoolCounter) == sizeof (gint32));
 
-       return (klass->nested_in &&
-                       is_sd_process (domain, klass->nested_in) &&
-               !strcmp (klass->name, "AsyncReadHandler"));
-}
+       mono_refcount_init (&threadpool, destroy);
 
+       threadpool.domains = g_ptr_array_new ();
+       mono_coop_mutex_init (&threadpool.domains_lock);
 
-#ifdef DISABLE_SOCKETS
+       threadpool.limit_io_min = mono_cpu_count ();
+       threadpool.limit_io_max = CLAMP (threadpool.limit_io_min * 100, MIN (threadpool.limit_io_min, 200), MAX (threadpool.limit_io_min, 200));
 
-void
-socket_io_cleanup (SocketIOData *data)
-{
+       mono_threadpool_worker_init (worker_callback);
 }
 
-static int
-get_event_from_state (MonoSocketAsyncResult *state)
+static void
+cleanup (void)
 {
-       g_assert_not_reached ();
-       return -1;
+       mono_threadpool_worker_cleanup ();
+
+       mono_refcount_dec (&threadpool);
 }
 
-int
-get_events_from_list (MonoMList *list)
+gboolean
+mono_threadpool_enqueue_work_item (MonoDomain *domain, MonoObject *work_item, MonoError *error)
 {
-       return 0;
-}
+       static MonoClass *threadpool_class = NULL;
+       static MonoMethod *unsafe_queue_custom_work_item_method = NULL;
+       MonoDomain *current_domain;
+       MonoBoolean f;
+       gpointer args [2];
 
-#else
+       error_init (error);
+       g_assert (work_item);
 
-void
-socket_io_cleanup (SocketIOData *data)
-{
-       mono_mutex_lock (&data->io_lock);
-       if (data->inited != 2) {
-               mono_mutex_unlock (&data->io_lock);
-               return;
-       }
-       data->inited = 3;
-       data->shutdown (data->event_data);
-       mono_mutex_unlock (&data->io_lock);
-}
+       if (!threadpool_class)
+               threadpool_class = mono_class_load_from_name (mono_defaults.corlib, "System.Threading", "ThreadPool");
 
-static int
-get_event_from_state (MonoSocketAsyncResult *state)
-{
-       switch (state->operation) {
-       case AIO_OP_ACCEPT:
-       case AIO_OP_RECEIVE:
-       case AIO_OP_RECV_JUST_CALLBACK:
-       case AIO_OP_RECEIVEFROM:
-       case AIO_OP_READPIPE:
-       case AIO_OP_ACCEPTRECEIVE:
-       case AIO_OP_RECEIVE_BUFFERS:
-               return MONO_POLLIN;
-       case AIO_OP_SEND:
-       case AIO_OP_SEND_JUST_CALLBACK:
-       case AIO_OP_SENDTO:
-       case AIO_OP_CONNECT:
-       case AIO_OP_SEND_BUFFERS:
-       case AIO_OP_DISCONNECT:
-               return MONO_POLLOUT;
-       default: /* Should never happen */
-               g_message ("get_event_from_state: unknown value in switch!!!");
-               return 0;
-       }
-}
+       if (!unsafe_queue_custom_work_item_method)
+               unsafe_queue_custom_work_item_method = mono_class_get_method_from_name (threadpool_class, "UnsafeQueueCustomWorkItem", 2);
+       g_assert (unsafe_queue_custom_work_item_method);
 
-int
-get_events_from_list (MonoMList *list)
-{
-       MonoSocketAsyncResult *state;
-       int events = 0;
+       f = FALSE;
 
-       while (list && (state = (MonoSocketAsyncResult *)mono_mlist_get_data (list))) {
-               events |= get_event_from_state (state);
-               list = mono_mlist_next (list);
-       }
+       args [0] = (gpointer) work_item;
+       args [1] = (gpointer) &f;
 
-       return events;
+       current_domain = mono_domain_get ();
+       if (current_domain == domain) {
+               mono_runtime_invoke_checked (unsafe_queue_custom_work_item_method, NULL, args, error);
+               return_val_if_nok (error, FALSE);
+       } else {
+               mono_thread_push_appdomain_ref (domain);
+               if (mono_domain_set (domain, FALSE)) {
+                       mono_runtime_invoke_checked (unsafe_queue_custom_work_item_method, NULL, args, error);
+                       if (!is_ok (error)) {
+                               mono_thread_pop_appdomain_ref ();
+                               return FALSE;
+                       }
+                       mono_domain_set (current_domain, TRUE);
+               }
+               mono_thread_pop_appdomain_ref ();
+       }
+       return TRUE;
 }
 
-#define ICALL_RECV(x)  ves_icall_System_Net_Sockets_Socket_Receive_internal (\
-                               (SOCKET)(gssize)x->handle, x->buffer, x->offset, x->size,\
-                                x->socket_flags, &x->error);
+/* LOCKING: domains_lock must be held. */
+static ThreadPoolDomain *
+tpdomain_create (MonoDomain *domain)
+{
+       ThreadPoolDomain *tpdomain;
 
-#define ICALL_SEND(x)  ves_icall_System_Net_Sockets_Socket_Send_internal (\
-                               (SOCKET)(gssize)x->handle, x->buffer, x->offset, x->size,\
-                                x->socket_flags, &x->error);
+       tpdomain = g_new0 (ThreadPoolDomain, 1);
+       tpdomain->domain = domain;
+       mono_coop_cond_init (&tpdomain->cleanup_cond);
 
-#endif /* !DISABLE_SOCKETS */
+       g_ptr_array_add (threadpool.domains, tpdomain);
 
-static void
-threadpool_jobs_inc (MonoObject *obj)
-{
-       if (obj)
-               InterlockedIncrement (&obj->vtable->domain->threadpool_jobs);
+       return tpdomain;
 }
 
+/* LOCKING: domains_lock must be held. */
 static gboolean
-threadpool_jobs_dec (MonoObject *obj)
+tpdomain_remove (ThreadPoolDomain *tpdomain)
 {
-       MonoDomain *domain;
-       int remaining_jobs;
-
-       if (obj == NULL)
-               return FALSE;
-
-       domain = obj->vtable->domain;
-       remaining_jobs = InterlockedDecrement (&domain->threadpool_jobs);
-       if (remaining_jobs == 0 && domain->cleanup_semaphore) {
-               ReleaseSemaphore (domain->cleanup_semaphore, 1, NULL);
-               return TRUE;
-       }
-       return FALSE;
+       g_assert (tpdomain);
+       return g_ptr_array_remove (threadpool.domains, tpdomain);
 }
 
-MonoObject *
-get_io_event (MonoMList **list, gint event)
+/* LOCKING: domains_lock must be held */
+static ThreadPoolDomain *
+tpdomain_get (MonoDomain *domain)
 {
-       MonoObject *state;
-       MonoMList *current;
-       MonoMList *prev;
-
-       current = *list;
-       prev = NULL;
-       state = NULL;
-       while (current) {
-               state = mono_mlist_get_data (current);
-               if (get_event_from_state ((MonoSocketAsyncResult *) state) == event)
-                       break;
+       gint i;
 
-               state = NULL;
-               prev = current;
-               current = mono_mlist_next (current);
-       }
+       g_assert (domain);
 
-       if (current) {
-               if (prev) {
-                       mono_mlist_set_next (prev, mono_mlist_next (current));
-               } else {
-                       *list = mono_mlist_next (*list);
-               }
+       for (i = 0; i < threadpool.domains->len; ++i) {
+               ThreadPoolDomain *tpdomain;
+
+               tpdomain = (ThreadPoolDomain *)g_ptr_array_index (threadpool.domains, i);
+               if (tpdomain->domain == domain)
+                       return tpdomain;
        }
 
-       return state;
+       return NULL;
 }
 
-/*
- * select/poll wake up when a socket is closed, but epoll just removes
- * the socket from its internal list without notification.
- */
-void
-mono_thread_pool_remove_socket (int sock)
+static void
+tpdomain_free (ThreadPoolDomain *tpdomain)
 {
-       MonoMList *list;
-       MonoSocketAsyncResult *state;
-       MonoObject *ares;
-
-       if (use_ms_threadpool ()) {
-#ifndef DISABLE_SOCKETS
-               mono_threadpool_ms_io_remove_socket (sock);
-#endif
-               return;
-       }
+       g_free (tpdomain);
+}
 
-       if (socket_io_data.inited == 0)
-               return;
+/* LOCKING: domains_lock must be held */
+static ThreadPoolDomain *
+tpdomain_get_next (ThreadPoolDomain *current)
+{
+       ThreadPoolDomain *tpdomain = NULL;
+       gint len;
 
-       mono_mutex_lock (&socket_io_data.io_lock);
-       if (socket_io_data.sock_to_state == NULL) {
-               mono_mutex_unlock (&socket_io_data.io_lock);
-               return;
-       }
-       list = mono_g_hash_table_lookup (socket_io_data.sock_to_state, GINT_TO_POINTER (sock));
-       if (list)
-               mono_g_hash_table_remove (socket_io_data.sock_to_state, GINT_TO_POINTER (sock));
-       mono_mutex_unlock (&socket_io_data.io_lock);
-       
-       while (list) {
-               state = (MonoSocketAsyncResult *) mono_mlist_get_data (list);
-               if (state->operation == AIO_OP_RECEIVE)
-                       state->operation = AIO_OP_RECV_JUST_CALLBACK;
-               else if (state->operation == AIO_OP_SEND)
-                       state->operation = AIO_OP_SEND_JUST_CALLBACK;
-
-               ares = get_io_event (&list, MONO_POLLIN);
-               threadpool_append_job (&async_io_tp, ares);
-               if (list) {
-                       ares = get_io_event (&list, MONO_POLLOUT);
-                       threadpool_append_job (&async_io_tp, ares);
+       len = threadpool.domains->len;
+       if (len > 0) {
+               gint i, current_idx = -1;
+               if (current) {
+                       for (i = 0; i < len; ++i) {
+                               if (current == g_ptr_array_index (threadpool.domains, i)) {
+                                       current_idx = i;
+                                       break;
+                               }
+                       }
+               }
+               for (i = current_idx + 1; i < len + current_idx + 1; ++i) {
+                       ThreadPoolDomain *tmp = (ThreadPoolDomain *)g_ptr_array_index (threadpool.domains, i % len);
+                       if (tmp->outstanding_request > 0) {
+                               tpdomain = tmp;
+                               break;
+                       }
                }
        }
+
+       return tpdomain;
 }
 
-static void
-init_event_system (SocketIOData *data)
+static MonoObject*
+try_invoke_perform_wait_callback (MonoObject** exc, MonoError *error)
 {
-#ifdef HAVE_EPOLL
-       if (data->event_system == EPOLL_BACKEND) {
-               data->event_data = tp_epoll_init (data);
-               if (data->event_data == NULL) {
-                       if (g_getenv ("MONO_DEBUG"))
-                               g_message ("Falling back to poll()");
-                       data->event_system = POLL_BACKEND;
-               }
-       }
-#elif defined(USE_KQUEUE_FOR_THREADPOOL)
-       if (data->event_system == KQUEUE_BACKEND)
-               data->event_data = tp_kqueue_init (data);
-#endif
-       if (data->event_system == POLL_BACKEND)
-               data->event_data = tp_poll_init (data);
+       HANDLE_FUNCTION_ENTER ();
+       error_init (error);
+       MonoObject *res = mono_runtime_try_invoke (mono_defaults.threadpool_perform_wait_callback_method, NULL, NULL, exc, error);
+       HANDLE_FUNCTION_RETURN_VAL (res);
 }
 
 static void
-socket_io_init (SocketIOData *data)
+worker_callback (void)
 {
-       int inited;
+       MonoError error;
+       ThreadPoolDomain *tpdomain, *previous_tpdomain;
+       ThreadPoolCounter counter;
+       MonoInternalThread *thread;
 
-       if (data->inited >= 2) // 2 -> initialized, 3-> cleaned up
+       if (!mono_refcount_tryinc (&threadpool))
                return;
 
-       inited = InterlockedCompareExchange (&data->inited, 1, 0);
-       if (inited >= 1) {
-               while (TRUE) {
-                       if (data->inited >= 2)
-                               return;
-                       SleepEx (1, FALSE);
-               }
-       }
+       thread = mono_thread_internal_current ();
 
-       mono_mutex_lock (&data->io_lock);
-       data->sock_to_state = mono_g_hash_table_new_type (g_direct_hash, g_direct_equal, MONO_HASH_VALUE_GC);
-#ifdef HAVE_EPOLL
-       data->event_system = EPOLL_BACKEND;
-#elif defined(USE_KQUEUE_FOR_THREADPOOL)
-       data->event_system = KQUEUE_BACKEND;
-#else
-       data->event_system = POLL_BACKEND;
-#endif
-       if (g_getenv ("MONO_DISABLE_AIO") != NULL)
-               data->event_system = POLL_BACKEND;
-
-       init_event_system (data);
-       mono_thread_create_internal (mono_get_root_domain (), data->wait, data, TRUE, SMALL_STACK);
-       mono_mutex_unlock (&data->io_lock);
-       data->inited = 2;
-       threadpool_start_thread (&async_io_tp);
-}
+       COUNTER_ATOMIC (counter, {
+               if (!(counter._.working < 32767 /* G_MAXINT16 */))
+                       g_error ("%s: counter._.working = %d, but should be < 32767", __func__, counter._.working);
 
-static void
-socket_io_add (MonoAsyncResult *ares, MonoSocketAsyncResult *state)
-{
-       MonoMList *list;
-       SocketIOData *data = &socket_io_data;
-       int fd;
-       gboolean is_new;
-       int ievt;
-
-       socket_io_init (&socket_io_data);
-       if (mono_runtime_is_shutting_down () || data->inited == 3 || data->sock_to_state == NULL)
-               return;
-       if (async_tp.pool_status == 2)
-               return;
+               counter._.starting --;
+               counter._.working ++;
+       });
 
-       MONO_OBJECT_SETREF (state, ares, ares);
+       if (mono_runtime_is_shutting_down ()) {
+               COUNTER_ATOMIC (counter, {
+                       counter._.working --;
+               });
 
-       fd = GPOINTER_TO_INT (state->handle);
-       mono_mutex_lock (&data->io_lock);
-       if (data->sock_to_state == NULL) {
-               mono_mutex_unlock (&data->io_lock);
+               mono_refcount_dec (&threadpool);
                return;
        }
-       list = mono_g_hash_table_lookup (data->sock_to_state, GINT_TO_POINTER (fd));
-       if (list == NULL) {
-               list = mono_mlist_alloc ((MonoObject*)state);
-               is_new = TRUE;
-       } else {
-               list = mono_mlist_append (list, (MonoObject*)state);
-               is_new = FALSE;
-       }
 
-       mono_g_hash_table_replace (data->sock_to_state, state->handle, list);
-       ievt = get_events_from_list (list);
-       /* The modify function leaves the io_lock critical section. */
-       data->modify (data, fd, state->operation, ievt, is_new);
-}
+       /*
+        * This is needed so there is always an lmf frame in the runtime invoke call below,
+        * so ThreadAbortExceptions are caught even if the thread is in native code.
+        */
+       mono_defaults.threadpool_perform_wait_callback_method->save_lmf = TRUE;
 
-#ifndef DISABLE_SOCKETS
-static gboolean
-socket_io_filter (MonoObject *target, MonoObject *state)
-{
-       gint op;
-       MonoSocketAsyncResult *sock_res;
-       MonoClass *klass;
-       MonoDomain *domain;
+       domains_lock ();
 
-       if (target == NULL || state == NULL)
-               return FALSE;
+       previous_tpdomain = NULL;
 
-       domain = target->vtable->domain;
-       klass = target->vtable->klass;
-       if (socket_async_call_klass == NULL && is_socketasynccall (domain, klass))
-               socket_async_call_klass = klass;
+       while (!mono_runtime_is_shutting_down ()) {
+               gboolean retire = FALSE;
 
-       if (process_async_call_klass == NULL && is_sdp_asyncreadhandler (domain, klass))
-               process_async_call_klass = klass;
+               if (thread->state & (ThreadState_AbortRequested | ThreadState_SuspendRequested)) {
+                       domains_unlock ();
+                       if (mono_thread_interruption_checkpoint ()) {
+                               domains_lock ();
+                               continue;
+                       }
+                       domains_lock ();
+               }
 
-       if (klass != socket_async_call_klass && klass != process_async_call_klass)
-               return FALSE;
+               tpdomain = tpdomain_get_next (previous_tpdomain);
+               if (!tpdomain)
+                       break;
 
-       sock_res = (MonoSocketAsyncResult *) state;
-       op = sock_res->operation;
-       if (op < AIO_OP_FIRST || op >= AIO_OP_LAST)
-               return FALSE;
+               tpdomain->outstanding_request --;
+               g_assert (tpdomain->outstanding_request >= 0);
 
-       return TRUE;
-}
-#endif /* !DISABLE_SOCKETS */
+               mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_THREADPOOL, "[%p] worker running in domain %p (outstanding requests %d)",
+                       GUINT_TO_POINTER (MONO_NATIVE_THREAD_ID_TO_UINT (mono_native_thread_id_get ())), tpdomain->domain, tpdomain->outstanding_request);
 
-/* Returns the exception thrown when invoking, if any */
-static MonoObject *
-mono_async_invoke (ThreadPool *tp, MonoAsyncResult *ares)
-{
-       MonoObject *exc = NULL;
+               g_assert (tpdomain->threadpool_jobs >= 0);
+               tpdomain->threadpool_jobs ++;
 
-       mono_async_result_invoke (ares, &exc);
+               domains_unlock ();
 
-#if DEBUG
-       InterlockedDecrement (&tp->njobs);
-#endif
-       if (!tp->is_io)
-               InterlockedIncrement (&tp->nexecuted);
+               MonoString *thread_name = mono_string_new_checked (mono_get_root_domain (), "Threadpool worker", &error);
+               mono_error_assert_ok (&error);
+               mono_thread_set_name_internal (thread, thread_name, FALSE, TRUE, &error);
+               mono_error_assert_ok (&error);
 
-       if (InterlockedDecrement (&monitor_njobs) == 0)
-               monitor_state = MONITOR_STATE_FALLING_ASLEEP;
+               mono_thread_clr_state (thread, (MonoThreadState)~ThreadState_Background);
+               if (!mono_thread_test_state (thread , ThreadState_Background))
+                       ves_icall_System_Threading_Thread_SetState (thread, ThreadState_Background);
 
-       return exc;
-}
+               mono_thread_push_appdomain_ref (tpdomain->domain);
+               if (mono_domain_set (tpdomain->domain, FALSE)) {
+                       MonoObject *exc = NULL, *res;
 
-static void
-threadpool_start_idle_threads (ThreadPool *tp)
-{
-       int n;
-       guint32 stack_size;
-
-       stack_size = (!tp->is_io) ? 0 : SMALL_STACK;
-       do {
-               while (1) {
-                       n = tp->nthreads;
-                       if (n >= tp->min_threads)
-                               return;
-                       if (InterlockedCompareExchange (&tp->nthreads, n + 1, n) == n)
-                               break;
+                       res = try_invoke_perform_wait_callback (&exc, &error);
+                       if (exc || !mono_error_ok(&error)) {
+                               if (exc == NULL)
+                                       exc = (MonoObject *) mono_error_convert_to_exception (&error);
+                               else
+                                       mono_error_cleanup (&error);
+                               mono_thread_internal_unhandled_exception (exc);
+                       } else if (res && *(MonoBoolean*) mono_object_unbox (res) == FALSE) {
+                               retire = TRUE;
+                       }
+
+                       mono_domain_set (mono_get_root_domain (), TRUE);
                }
-#ifndef DISABLE_PERFCOUNTERS
-               mono_perfcounter_update_value (tp->pc_nthreads, TRUE, 1);
-#endif
-               mono_thread_create_internal (mono_get_root_domain (), tp->async_invoke, tp, TRUE, stack_size);
-               SleepEx (100, TRUE);
-       } while (1);
-}
+               mono_thread_pop_appdomain_ref ();
 
-static void
-threadpool_init (ThreadPool *tp, int min_threads, int max_threads, void (*async_invoke) (gpointer))
-{
-       memset (tp, 0, sizeof (ThreadPool));
-       tp->min_threads = min_threads;
-       tp->max_threads = max_threads;
-       tp->async_invoke = async_invoke;
-       tp->queue = mono_cq_create ();
-       MONO_SEM_INIT (&tp->new_job, 0);
-}
+               domains_lock ();
 
-#ifndef DISABLE_PERFCOUNTERS
-static void *
-init_perf_counter (const char *category, const char *counter)
-{
-       MonoString *category_str;
-       MonoString *counter_str;
-       MonoString *machine;
-       MonoDomain *root;
-       MonoBoolean custom;
-       int type;
-
-       if (category == NULL || counter == NULL)
-               return NULL;
-       root = mono_get_root_domain ();
-       category_str = mono_string_new (root, category);
-       counter_str = mono_string_new (root, counter);
-       machine = mono_string_new (root, ".");
-       return mono_perfcounter_get_impl (category_str, counter_str, NULL, machine, &type, &custom);
-}
-#endif
+               tpdomain->threadpool_jobs --;
+               g_assert (tpdomain->threadpool_jobs >= 0);
 
-#ifdef DEBUG
-static void
-print_pool_info (ThreadPool *tp)
-{
+               if (tpdomain->outstanding_request + tpdomain->threadpool_jobs == 0 && mono_domain_is_unloading (tpdomain->domain)) {
+                       gboolean removed;
+
+                       removed = tpdomain_remove (tpdomain);
+                       g_assert (removed);
 
-//     if (tp->tail - tp->head == 0)
-//             return;
-
-       g_print ("Pool status? %d\n", InterlockedCompareExchange (&tp->pool_status, 0, 0));
-       g_print ("Min. threads: %d\n", InterlockedCompareExchange (&tp->min_threads, 0, 0));
-       g_print ("Max. threads: %d\n", InterlockedCompareExchange (&tp->max_threads, 0, 0));
-       g_print ("nthreads: %d\n", InterlockedCompareExchange (&tp->nthreads, 0, 0));
-       g_print ("busy threads: %d\n", InterlockedCompareExchange (&tp->busy_threads, 0, 0));
-       g_print ("Waiting: %d\n", InterlockedCompareExchange (&tp->waiting, 0, 0));
-       g_print ("Queued: %d\n", (tp->tail - tp->head));
-       if (tp == &async_tp) {
-               int i;
-               mono_mutex_lock (&wsqs_lock);
-               for (i = 0; i < wsqs->len; i++) {
-                       g_print ("\tWSQ %d: %d\n", i, mono_wsq_count (g_ptr_array_index (wsqs, i)));
+                       mono_coop_cond_signal (&tpdomain->cleanup_cond);
+                       tpdomain = NULL;
                }
-               mono_mutex_unlock (&wsqs_lock);
-       } else {
-               g_print ("\tSockets: %d\n", mono_g_hash_table_size (socket_io_data.sock_to_state));
+
+               if (retire)
+                       break;
+
+               previous_tpdomain = tpdomain;
        }
-       g_print ("-------------\n");
+
+       domains_unlock ();
+
+       COUNTER_ATOMIC (counter, {
+               counter._.working --;
+       });
+
+       mono_refcount_dec (&threadpool);
 }
 
-static void
-signal_handler (int signo)
+void
+mono_threadpool_cleanup (void)
 {
-       ThreadPool *tp;
-
-       tp = &async_tp;
-       g_print ("\n-----Non-IO-----\n");
-       print_pool_info (tp);
-       tp = &async_io_tp;
-       g_print ("\n-----IO-----\n");
-       print_pool_info (tp);
-       alarm (2);
-}
+#ifndef DISABLE_SOCKETS
+       mono_threadpool_io_cleanup ();
 #endif
+       mono_lazy_cleanup (&status, cleanup);
+}
 
-#define SAMPLES_PERIOD 500
-#define HISTORY_SIZE 10
-/* number of iteration without any jobs
-   in the queue before going to sleep */
-#define NUM_WAITING_ITERATIONS 10
-
-typedef struct {
-       gint32 nexecuted;
-       gint32 nthreads;
-       gint8 nthreads_diff;
-} SamplesHistory;
-
-/*
- * returns :
- *  -  1 if the number of threads should increase
- *  -  0 if it should not change
- *  - -1 if it should decrease
- *  - -2 in case of error
- */
-static gint8
-monitor_heuristic (gint16 *current, gint16 *history_size, SamplesHistory *history, ThreadPool *tp)
+MonoAsyncResult *
+mono_threadpool_begin_invoke (MonoDomain *domain, MonoObject *target, MonoMethod *method, gpointer *params, MonoError *error)
 {
-       int i;
-       gint8 decision G_GNUC_UNUSED;
-       gint16 cur, max = 0;
-       gboolean all_waitsleepjoin;
-       MonoInternalThread *thread;
+       static MonoClass *async_call_klass = NULL;
+       MonoMethodMessage *message;
+       MonoAsyncResult *async_result;
+       MonoAsyncCall *async_call;
+       MonoDelegate *async_callback = NULL;
+       MonoObject *state = NULL;
 
-       /*
-        * The following heuristic tries to approach the optimal number of threads to maximize jobs throughput. To
-        * achieve this, it simply stores the number of jobs executed (nexecuted), the number of Threads (nthreads)
-        * and the decision (nthreads_diff) for the past HISTORY_SIZE periods of time, each period being of
-        * duration SAMPLES_PERIOD ms. This history gives us an insight into what happened, and to see if we should
-        * increase or reduce the number of threads by comparing the last period (current) to the best one.
-        *
-        * The algorithm can be describe as following :
-        *  - if we have a better throughput than the best period : we should either increase the number of threads
-        *     in case we already have more threads, either reduce the number of threads if we have less threads; this
-        *     is equivalent to move away from the number of threads of the best period, because we are currently better
-        *  - if we have a worse throughput than the best period : we should either decrease the number of threads if
-        *     we have more threads, either increase the number of threads if we have less threads;  this is equivalent
-        *     to get closer to the number of threads of the best period, because we are currently worse
-        */
+       if (!async_call_klass)
+               async_call_klass = mono_class_load_from_name (mono_defaults.corlib, "System", "MonoAsyncCall");
 
-       *history_size = MIN (*history_size + 1, HISTORY_SIZE);
-       cur = *current = (*current + 1) % *history_size;
-
-       history [cur].nthreads = tp->nthreads;
-       history [cur].nexecuted = InterlockedExchange (&tp->nexecuted, 0);
-
-       if (tp->waiting) {
-               /* if we have waiting thread in the pool, then do not create a new one */
-               history [cur].nthreads_diff = tp->waiting > 1 ? -1 : 0;
-               decision = 0;
-       } else if (tp->nthreads < tp->min_threads) {
-               history [cur].nthreads_diff = 1;
-               decision = 1;
-       } else if (*history_size <= 1) {
-               /* first iteration, let's add a thread by default */
-               history [cur].nthreads_diff = 1;
-               decision = 2;
-       } else {
-               mono_mutex_lock (&threads_lock);
-               if (threads == NULL) {
-                       mono_mutex_unlock (&threads_lock);
-                       return -2;
-               }
-               all_waitsleepjoin = TRUE;
-               for (i = 0; i < threads->len; ++i) {
-                       thread = g_ptr_array_index (threads, i);
-                       if (!(thread->state & ThreadState_WaitSleepJoin)) {
-                               all_waitsleepjoin = FALSE;
-                               break;
-                       }
-               }
-               mono_mutex_unlock (&threads_lock);
+       error_init (error);
 
-               if (all_waitsleepjoin) {
-                       /* we might be in a condition of starvation/deadlock with tasks waiting for each others */
-                       history [cur].nthreads_diff = 1;
-                       decision = 5;
-               } else {
-                       max = cur == 0 ? 1 : 0;
-                       for (i = 0; i < *history_size; i++) {
-                               if (i == cur)
-                                       continue;
-                               if (history [i].nexecuted > history [max].nexecuted)
-                                       max = i;
-                       }
+       message = mono_method_call_message_new (method, params, mono_get_delegate_invoke (method->klass), (params != NULL) ? (&async_callback) : NULL, (params != NULL) ? (&state) : NULL, error);
+       return_val_if_nok (error, NULL);
 
-                       if (history [cur].nexecuted >= history [max].nexecuted) {
-                               /* we improved the situation, let's continue ! */
-                               history [cur].nthreads_diff = history [cur].nthreads >= history [max].nthreads ? 1 : -1;
-                               decision = 3;
-                       } else {
-                               /* we made it worse, let's return to previous situation */
-                               history [cur].nthreads_diff = history [cur].nthreads >= history [max].nthreads ? -1 : 1;
-                               decision = 4;
-                       }
-               }
+       async_call = (MonoAsyncCall*) mono_object_new_checked (domain, async_call_klass, error);
+       return_val_if_nok (error, NULL);
+
+       MONO_OBJECT_SETREF (async_call, msg, message);
+       MONO_OBJECT_SETREF (async_call, state, state);
+
+       if (async_callback) {
+               MONO_OBJECT_SETREF (async_call, cb_method, mono_get_delegate_invoke (((MonoObject*) async_callback)->vtable->klass));
+               MONO_OBJECT_SETREF (async_call, cb_target, async_callback);
        }
 
-#if DEBUG
-       printf ("monitor_thread: decision: %1d, history [current]: {nexecuted: %5d, nthreads: %3d, waiting: %2d, nthreads_diff: %2d}, history [max]: {nexecuted: %5d, nthreads: %3d}\n",
-                       decision, history [cur].nexecuted, history [cur].nthreads, tp->waiting, history [cur].nthreads_diff, history [max].nexecuted, history [max].nthreads);
-#endif
-       
-       return history [cur].nthreads_diff;
+       async_result = mono_async_result_new (domain, NULL, async_call->state, NULL, (MonoObject*) async_call, error);
+       return_val_if_nok (error, NULL);
+       MONO_OBJECT_SETREF (async_result, async_delegate, target);
+
+       mono_threadpool_enqueue_work_item (domain, (MonoObject*) async_result, error);
+       return_val_if_nok (error, NULL);
+
+       return async_result;
 }
 
-static void
-monitor_thread (gpointer unused)
+MonoObject *
+mono_threadpool_end_invoke (MonoAsyncResult *ares, MonoArray **out_args, MonoObject **exc, MonoError *error)
 {
-       ThreadPool *pools [2];
-       MonoInternalThread *thread;
-       int i;
-
-       guint32 ms;
-       gint8 num_waiting_iterations = 0;
+       MonoAsyncCall *ac;
 
-       gint16 history_size = 0, current = -1;
-       SamplesHistory *history = malloc (sizeof (SamplesHistory) * HISTORY_SIZE);
+       error_init (error);
+       g_assert (exc);
+       g_assert (out_args);
 
-       pools [0] = &async_tp;
-       pools [1] = &async_io_tp;
-       thread = mono_thread_internal_current ();
-       ves_icall_System_Threading_Thread_SetName_internal (thread, mono_string_new (mono_domain_get (), "Threadpool monitor"));
-       while (1) {
-               ms = SAMPLES_PERIOD;
-               i = 10; //number of spurious awakes we tolerate before doing a round of rebalancing.
-               mono_gc_set_skip_thread (TRUE);
-               MONO_PREPARE_BLOCKING
-               do {
-                       guint32 ts;
-                       ts = mono_msec_ticks ();
-                       if (SleepEx (ms, TRUE) == 0)
-                               break;
-                       ms -= (mono_msec_ticks () - ts);
-                       if (mono_runtime_is_shutting_down ())
-                               break;
-                       check_for_interruption_critical ();
-               } while (ms > 0 && i--);
-               MONO_FINISH_BLOCKING
-               mono_gc_set_skip_thread (FALSE);
+       *exc = NULL;
+       *out_args = NULL;
 
-               if (mono_runtime_is_shutting_down ())
-                       break;
+       /* check if already finished */
+       mono_monitor_enter ((MonoObject*) ares);
 
-               if (suspended)
-                       continue;
+       if (ares->endinvoke_called) {
+               mono_error_set_invalid_operation(error, "Delegate EndInvoke method called more than once");
+               mono_monitor_exit ((MonoObject*) ares);
+               return NULL;
+       }
 
-               /* threadpool is cleaning up */
-               if (async_tp.pool_status == 2 || async_io_tp.pool_status == 2)
-                       break;
+       ares->endinvoke_called = 1;
 
-               MONO_PREPARE_BLOCKING
-               switch (monitor_state) {
-               case MONITOR_STATE_AWAKE:
-                       num_waiting_iterations = 0;
-                       break;
-               case MONITOR_STATE_FALLING_ASLEEP:
-                       if (++num_waiting_iterations == NUM_WAITING_ITERATIONS) {
-                               if (monitor_state == MONITOR_STATE_FALLING_ASLEEP && InterlockedCompareExchange (&monitor_state, MONITOR_STATE_SLEEPING, MONITOR_STATE_FALLING_ASLEEP) == MONITOR_STATE_FALLING_ASLEEP) {
-                                       MONO_SEM_WAIT (&monitor_sem);
-
-                                       num_waiting_iterations = 0;
-                                       current = -1;
-                                       history_size = 0;
-                               }
-                       }
-                       break;
-               case MONITOR_STATE_SLEEPING:
-                       g_assert_not_reached ();
-               }
-               MONO_FINISH_BLOCKING
-
-               for (i = 0; i < 2; i++) {
-                       ThreadPool *tp;
-                       tp = pools [i];
-
-                       if (tp->is_io) {
-                               if (!tp->waiting && mono_cq_count (tp->queue) > 0)
-                                       threadpool_start_thread (tp);
-                       } else {
-                               gint8 nthreads_diff = monitor_heuristic (&current, &history_size, history, tp);
-
-                               if (nthreads_diff == 1)
-                                       threadpool_start_thread (tp);
-                               else if (nthreads_diff == -1)
-                                       threadpool_kill_thread (tp);
+       /* wait until we are really finished */
+       if (ares->completed) {
+               mono_monitor_exit ((MonoObject *) ares);
+       } else {
+               gpointer wait_event;
+               if (ares->handle) {
+                       wait_event = mono_wait_handle_get_handle ((MonoWaitHandle*) ares->handle);
+               } else {
+                       wait_event = mono_w32event_create (TRUE, FALSE);
+                       g_assert(wait_event);
+                       MonoWaitHandle *wait_handle = mono_wait_handle_new (mono_object_domain (ares), wait_event, error);
+                       if (!is_ok (error)) {
+                               mono_w32event_close (wait_event);
+                               return NULL;
                        }
+                       MONO_OBJECT_SETREF (ares, handle, (MonoObject*) wait_handle);
                }
+               mono_monitor_exit ((MonoObject*) ares);
+               MONO_ENTER_GC_SAFE;
+#ifdef HOST_WIN32
+               mono_win32_wait_for_single_object_ex (wait_event, INFINITE, TRUE);
+#else
+               mono_w32handle_wait_one (wait_event, MONO_INFINITE_WAIT, TRUE);
+#endif
+               MONO_EXIT_GC_SAFE;
        }
-}
 
-void
-mono_thread_pool_init_tls (void)
-{
-       if (use_ms_threadpool ()) {
-               mono_threadpool_ms_init_tls ();
-               return;
-       }
+       ac = (MonoAsyncCall*) ares->object_data;
+       g_assert (ac);
 
-       mono_wsq_init ();
+       *exc = ac->msg->exc; /* FIXME: GC add write barrier */
+       *out_args = ac->out_args;
+       return ac->res;
 }
 
-void
-mono_thread_pool_init (void)
+gboolean
+mono_threadpool_remove_domain_jobs (MonoDomain *domain, int timeout)
 {
-       gint threads_per_cpu = 1;
-       gint thread_count;
-       gint cpu_count;
-       int result;
-       
-       if (use_ms_threadpool ()) {
-               mono_threadpool_ms_init ();
-               return;
-       }
+       gint64 end;
+       ThreadPoolDomain *tpdomain;
+       gboolean ret;
 
-       cpu_count = mono_cpu_count ();
+       g_assert (domain);
+       g_assert (timeout >= -1);
 
-       if (tp_inited == 2)
-               return;
+       g_assert (mono_domain_is_unloading (domain));
 
-       result = InterlockedCompareExchange (&tp_inited, 1, 0);
-       if (result == 1) {
-               while (1) {
-                       SleepEx (1, FALSE);
-                       if (tp_inited == 2)
-                               return;
-               }
-       }
+       if (timeout != -1)
+               end = mono_msec_ticks () + timeout;
 
-       MONO_GC_REGISTER_ROOT_FIXED (socket_io_data.sock_to_state);
-       mono_mutex_init_recursive (&socket_io_data.io_lock);
-       if (g_getenv ("MONO_THREADS_PER_CPU") != NULL) {
-               threads_per_cpu = atoi (g_getenv ("MONO_THREADS_PER_CPU"));
-               if (threads_per_cpu < 1)
-                       threads_per_cpu = 1;
+#ifndef DISABLE_SOCKETS
+       mono_threadpool_io_remove_domain_jobs (domain);
+       if (timeout != -1) {
+               if (mono_msec_ticks () > end)
+                       return FALSE;
        }
+#endif
 
-       thread_count = MIN (cpu_count * threads_per_cpu, 100 * cpu_count);
-       threadpool_init (&async_tp, thread_count, MAX (100 * cpu_count, thread_count), async_invoke_thread);
-       threadpool_init (&async_io_tp, cpu_count * 2, cpu_count * 4, async_invoke_thread);
-       async_io_tp.is_io = TRUE;
+       /*
+        * Wait for all threads which execute jobs in the domain to exit.
+        * The is_unloading () check in worker_request () ensures that
+        * no new jobs are added after we enter the lock below.
+        */
 
-       async_call_klass = mono_class_from_name (mono_defaults.corlib, "System", "MonoAsyncCall");
-       g_assert (async_call_klass);
+       if (!mono_lazy_is_initialized (&status))
+               return TRUE;
 
-       mono_mutex_init (&threads_lock);
-       threads = g_ptr_array_sized_new (thread_count);
-       g_assert (threads);
+       mono_refcount_inc (&threadpool);
 
-       mono_mutex_init_recursive (&wsqs_lock);
-       wsqs = g_ptr_array_sized_new (MAX (100 * cpu_count, thread_count));
+       domains_lock ();
 
-#ifndef DISABLE_PERFCOUNTERS
-       async_tp.pc_nitems = init_perf_counter ("Mono Threadpool", "Work Items Added");
-       g_assert (async_tp.pc_nitems);
+       tpdomain = tpdomain_get (domain);
+       if (!tpdomain) {
+               domains_unlock ();
+               mono_refcount_dec (&threadpool);
+               return TRUE;
+       }
 
-       async_io_tp.pc_nitems = init_perf_counter ("Mono Threadpool", "IO Work Items Added");
-       g_assert (async_io_tp.pc_nitems);
+       ret = TRUE;
 
-       async_tp.pc_nthreads = init_perf_counter ("Mono Threadpool", "# of Threads");
-       g_assert (async_tp.pc_nthreads);
+       while (tpdomain->outstanding_request + tpdomain->threadpool_jobs > 0) {
+               if (timeout == -1) {
+                       mono_coop_cond_wait (&tpdomain->cleanup_cond, &threadpool.domains_lock);
+               } else {
+                       gint64 now;
+                       gint res;
 
-       async_io_tp.pc_nthreads = init_perf_counter ("Mono Threadpool", "# of IO Threads");
-       g_assert (async_io_tp.pc_nthreads);
-#endif
-       tp_inited = 2;
-#ifdef DEBUG
-       signal (SIGALRM, signal_handler);
-       alarm (2);
-#endif
+                       now = mono_msec_ticks();
+                       if (now > end) {
+                               ret = FALSE;
+                               break;
+                       }
 
-       MONO_SEM_INIT (&monitor_sem, 0);
-       monitor_state = MONITOR_STATE_AWAKE;
-       monitor_njobs = 0;
-}
-
-static MonoAsyncResult *
-create_simple_asyncresult (MonoObject *target, MonoObject *state)
-{
-       MonoDomain *domain = mono_domain_get ();
-       MonoAsyncResult *ares;
-
-       /* Don't call mono_async_result_new() to avoid capturing the context */
-       ares = (MonoAsyncResult *) mono_object_new (domain, mono_defaults.asyncresult_class);
-       MONO_OBJECT_SETREF (ares, async_delegate, target);
-       MONO_OBJECT_SETREF (ares, async_state, state);
-       return ares;
-}
-
-void
-icall_append_io_job (MonoObject *target, MonoSocketAsyncResult *state)
-{
-       MonoAsyncResult *ares;
-
-       ares = create_simple_asyncresult (target, (MonoObject *) state);
-
-       if (use_ms_threadpool ()) {
-#ifndef DISABLE_SOCKETS
-               mono_threadpool_ms_io_add (ares, state);
-#endif
-               return;
+                       res = mono_coop_cond_timedwait (&tpdomain->cleanup_cond, &threadpool.domains_lock, end - now);
+                       if (res != 0) {
+                               ret = FALSE;
+                               break;
+                       }
+               }
        }
 
-       socket_io_add (ares, state);
-}
-
-MonoAsyncResult *
-mono_thread_pool_add (MonoObject *target, MonoMethodMessage *msg, MonoDelegate *async_callback,
-                     MonoObject *state)
-{
-       MonoDomain *domain;
-       MonoAsyncResult *ares;
-       MonoAsyncCall *ac;
-
-       if (use_ms_threadpool ())
-               return mono_threadpool_ms_add (target, msg, async_callback, state);
+       /* Remove from the list the worker threads look at */
+       tpdomain_remove (tpdomain);
 
-       domain = mono_domain_get ();
+       domains_unlock ();
 
-       ac = (MonoAsyncCall*)mono_object_new (domain, async_call_klass);
-       MONO_OBJECT_SETREF (ac, msg, msg);
-       MONO_OBJECT_SETREF (ac, state, state);
-
-       if (async_callback) {
-               ac->cb_method = mono_get_delegate_invoke (((MonoObject *)async_callback)->vtable->klass);
-               MONO_OBJECT_SETREF (ac, cb_target, async_callback);
-       }
+       mono_coop_cond_destroy (&tpdomain->cleanup_cond);
+       tpdomain_free (tpdomain);
 
-       ares = mono_async_result_new (domain, NULL, ac->state, NULL, (MonoObject*)ac);
-       MONO_OBJECT_SETREF (ares, async_delegate, target);
+       mono_refcount_dec (&threadpool);
 
-#ifndef DISABLE_SOCKETS
-       if (socket_io_filter (target, state)) {
-               socket_io_add (ares, (MonoSocketAsyncResult *) state);
-               return ares;
-       }
-#endif
-       threadpool_append_job (&async_tp, (MonoObject *) ares);
-       return ares;
+       return ret;
 }
 
-MonoObject *
-mono_thread_pool_finish (MonoAsyncResult *ares, MonoArray **out_args, MonoObject **exc)
+void
+mono_threadpool_suspend (void)
 {
-       MonoAsyncCall *ac;
-       HANDLE wait_event;
-
-       if (use_ms_threadpool ()) {
-               return mono_threadpool_ms_finish (ares, out_args, exc);
-       }
-
-       *exc = NULL;
-       *out_args = NULL;
-
-       /* check if already finished */
-       mono_monitor_enter ((MonoObject *) ares);
-       
-       if (ares->endinvoke_called) {
-               *exc = (MonoObject *) mono_get_exception_invalid_operation (NULL);
-               mono_monitor_exit ((MonoObject *) ares);
-               return NULL;
-       }
-
-       ares->endinvoke_called = 1;
-       /* wait until we are really finished */
-       if (!ares->completed) {
-               if (ares->handle == NULL) {
-                       wait_event = CreateEvent (NULL, TRUE, FALSE, NULL);
-                       g_assert(wait_event != 0);
-                       MONO_OBJECT_SETREF (ares, handle, (MonoObject *) mono_wait_handle_new (mono_object_domain (ares), wait_event));
-               } else {
-                       wait_event = mono_wait_handle_get_handle ((MonoWaitHandle *) ares->handle);
-               }
-               mono_monitor_exit ((MonoObject *) ares);
-               MONO_PREPARE_BLOCKING
-               WaitForSingleObjectEx (wait_event, INFINITE, TRUE);
-               MONO_FINISH_BLOCKING
-       } else {
-               mono_monitor_exit ((MonoObject *) ares);
-       }
-
-       ac = (MonoAsyncCall *) ares->object_data;
-       g_assert (ac != NULL);
-       *exc = ac->msg->exc; /* FIXME: GC add write barrier */
-       *out_args = ac->out_args;
-
-       return ac->res;
+       if (mono_lazy_is_initialized (&status))
+               mono_threadpool_worker_set_suspended (TRUE);
 }
 
-static void
-threadpool_kill_idle_threads (ThreadPool *tp)
+void
+mono_threadpool_resume (void)
 {
-       gint n;
-
-       n = (gint) InterlockedCompareExchange (&tp->max_threads, 0, -1);
-       while (n) {
-               n--;
-               MONO_SEM_POST (&tp->new_job);
-       }
+       if (mono_lazy_is_initialized (&status))
+               mono_threadpool_worker_set_suspended (FALSE);
 }
 
 void
-mono_thread_pool_cleanup (void)
+ves_icall_System_Threading_ThreadPool_GetAvailableThreadsNative (gint32 *worker_threads, gint32 *completion_port_threads)
 {
-       if (use_ms_threadpool ()) {
-               mono_threadpool_ms_cleanup ();
-               return;
-       }
-
-       if (InterlockedExchange (&async_io_tp.pool_status, 2) == 1) {
-               socket_io_cleanup (&socket_io_data); /* Empty when DISABLE_SOCKETS is defined */
-               threadpool_kill_idle_threads (&async_io_tp);
-       }
-
-       if (async_io_tp.queue != NULL) {
-               MONO_SEM_DESTROY (&async_io_tp.new_job);
-               threadpool_free_queue (&async_io_tp);
-       }
-
+       ThreadPoolCounter counter;
 
-       if (InterlockedExchange (&async_tp.pool_status, 2) == 1) {
-               threadpool_kill_idle_threads (&async_tp);
-               threadpool_free_queue (&async_tp);
-       }
-       
-       if (threads) {
-               mono_mutex_lock (&threads_lock);
-               if (threads)
-                       g_ptr_array_free (threads, FALSE);
-               threads = NULL;
-               mono_mutex_unlock (&threads_lock);
-       }
-
-       if (wsqs) {
-               mono_mutex_lock (&wsqs_lock);
-               mono_wsq_cleanup ();
-               if (wsqs)
-                       g_ptr_array_free (wsqs, TRUE);
-               wsqs = NULL;
-               mono_mutex_unlock (&wsqs_lock);
-               MONO_SEM_DESTROY (&async_tp.new_job);
-       }
-
-       MONO_SEM_DESTROY (&monitor_sem);
-}
-
-static gboolean
-threadpool_start_thread (ThreadPool *tp)
-{
-       gint n;
-       guint32 stack_size;
-       MonoInternalThread *thread;
+       if (!worker_threads || !completion_port_threads)
+               return;
 
-       stack_size = (!tp->is_io) ? 0 : SMALL_STACK;
-       while (!mono_runtime_is_shutting_down () && (n = tp->nthreads) < tp->max_threads) {
-               if (InterlockedCompareExchange (&tp->nthreads, n + 1, n) == n) {
-#ifndef DISABLE_PERFCOUNTERS
-                       mono_perfcounter_update_value (tp->pc_nthreads, TRUE, 1);
-#endif
-                       if (tp->is_io) {
-                               thread = mono_thread_create_internal (mono_get_root_domain (), tp->async_invoke, tp, TRUE, stack_size);
-                       } else {
-                               mono_mutex_lock (&threads_lock);
-                               thread = mono_thread_create_internal (mono_get_root_domain (), tp->async_invoke, tp, TRUE, stack_size);
-                               g_assert (threads != NULL);
-                               g_ptr_array_add (threads, thread);
-                               mono_mutex_unlock (&threads_lock);
-                       }
-                       return TRUE;
-               }
+       if (!mono_lazy_initialize (&status, initialize) || !mono_refcount_tryinc (&threadpool)) {
+               *worker_threads = 0;
+               *completion_port_threads = 0;
+               return;
        }
 
-       return FALSE;
-}
+       counter = COUNTER_READ ();
 
-static void
-pulse_on_new_job (ThreadPool *tp)
-{
-       if (tp->waiting)
-               MONO_SEM_POST (&tp->new_job);
-}
-
-static void
-threadpool_kill_thread (ThreadPool *tp)
-{
-       if (tp->destroy_thread == 0 && InterlockedCompareExchange (&tp->destroy_thread, 1, 0) == 0)
-               pulse_on_new_job (tp);
-}
-
-void
-icall_append_job (MonoObject *ar)
-{
-       threadpool_append_jobs (&async_tp, &ar, 1);
-}
+       *worker_threads = MAX (0, mono_threadpool_worker_get_max () - counter._.working);
+       *completion_port_threads = threadpool.limit_io_max;
 
-static void
-threadpool_append_job (ThreadPool *tp, MonoObject *ar)
-{
-       threadpool_append_jobs (tp, &ar, 1);
+       mono_refcount_dec (&threadpool);
 }
 
 void
-threadpool_append_async_io_jobs (MonoObject **jobs, gint njobs)
-{
-       threadpool_append_jobs (&async_io_tp, jobs, njobs);
-}
-
-static void
-threadpool_append_jobs (ThreadPool *tp, MonoObject **jobs, gint njobs)
+ves_icall_System_Threading_ThreadPool_GetMinThreadsNative (gint32 *worker_threads, gint32 *completion_port_threads)
 {
-       MonoObject *ar;
-       gint i;
-
-       if (mono_runtime_is_shutting_down ())
+       if (!worker_threads || !completion_port_threads)
                return;
 
-       if (tp->pool_status == 0 && InterlockedCompareExchange (&tp->pool_status, 1, 0) == 0) {
-               if (!tp->is_io) {
-                       monitor_internal_thread = mono_thread_create_internal (mono_get_root_domain (), monitor_thread, NULL, TRUE, SMALL_STACK);
-                       monitor_internal_thread->flags |= MONO_THREAD_FLAG_DONT_MANAGE;
-                       threadpool_start_thread (tp);
-               }
-               /* Create on demand up to min_threads to avoid startup penalty for apps that don't use
-                * the threadpool that much
-                */
-               if (mono_config_is_server_mode ()) {
-                       mono_thread_create_internal (mono_get_root_domain (), threadpool_start_idle_threads, tp, TRUE, SMALL_STACK);
-               }
-       }
-
-       InterlockedAdd (&monitor_njobs, njobs);
-
-       if (monitor_state == MONITOR_STATE_SLEEPING && InterlockedCompareExchange (&monitor_state, MONITOR_STATE_AWAKE, MONITOR_STATE_SLEEPING) == MONITOR_STATE_SLEEPING)
-               MONO_SEM_POST (&monitor_sem);
-
-       if (monitor_state == MONITOR_STATE_FALLING_ASLEEP)
-               InterlockedCompareExchange (&monitor_state, MONITOR_STATE_AWAKE, MONITOR_STATE_FALLING_ASLEEP);
-
-       for (i = 0; i < njobs; i++) {
-               ar = jobs [i];
-               if (ar == NULL || mono_domain_is_unloading (ar->vtable->domain))
-                       continue; /* Might happen when cleaning domain jobs */
-               threadpool_jobs_inc (ar); 
-#ifndef DISABLE_PERFCOUNTERS
-               mono_perfcounter_update_value (tp->pc_nitems, TRUE, 1);
-#endif
-               if (!tp->is_io && mono_wsq_local_push (ar))
-                       continue;
-
-               mono_cq_enqueue (tp->queue, ar);
+       if (!mono_lazy_initialize (&status, initialize) || !mono_refcount_tryinc (&threadpool)) {
+               *worker_threads = 0;
+               *completion_port_threads = 0;
+               return;
        }
 
-#if DEBUG
-       InterlockedAdd (&tp->njobs, njobs);
-#endif
+       *worker_threads = mono_threadpool_worker_get_min ();
+       *completion_port_threads = threadpool.limit_io_min;
 
-       for (i = 0; tp->waiting > 0 && i < MIN(njobs, tp->max_threads); i++)
-               pulse_on_new_job (tp);
+       mono_refcount_dec (&threadpool);
 }
 
-static void
-threadpool_clear_queue (ThreadPool *tp, MonoDomain *domain)
+void
+ves_icall_System_Threading_ThreadPool_GetMaxThreadsNative (gint32 *worker_threads, gint32 *completion_port_threads)
 {
-       MonoObject *obj;
-       MonoMList *other = NULL;
-       MonoCQ *queue = tp->queue;
-
-       if (!queue)
+       if (!worker_threads || !completion_port_threads)
                return;
 
-       while (mono_cq_dequeue (queue, &obj)) {
-               if (obj == NULL)
-                       continue;
-               if (obj->vtable->domain != domain)
-                       other = mono_mlist_prepend (other, obj);
-               threadpool_jobs_dec (obj);
-       }
-
-       if (mono_runtime_is_shutting_down ())
+       if (!mono_lazy_initialize (&status, initialize) || !mono_refcount_tryinc (&threadpool)) {
+               *worker_threads = 0;
+               *completion_port_threads = 0;
                return;
-
-       while (other) {
-               threadpool_append_job (tp, (MonoObject *) mono_mlist_get_data (other));
-               other = mono_mlist_next (other);
        }
-}
 
-static gboolean
-remove_sockstate_for_domain (gpointer key, gpointer value, gpointer user_data)
-{
-       MonoMList *list = value;
-       gboolean remove = FALSE;
-       while (list) {
-               MonoObject *data = mono_mlist_get_data (list);
-               if (mono_object_domain (data) == user_data) {
-                       remove = TRUE;
-                       mono_mlist_set_data (list, NULL);
-               }
-               list = mono_mlist_next (list);
-       }
-       //FIXME is there some sort of additional unregistration we need to perform here?
-       return remove;
+       *worker_threads = mono_threadpool_worker_get_max ();
+       *completion_port_threads = threadpool.limit_io_max;
+
+       mono_refcount_dec (&threadpool);
 }
 
-/*
- * Clean up the threadpool of all domain jobs.
- * Can only be called as part of the domain unloading process as
- * it will wait for all jobs to be visible to the interruption code. 
- */
-gboolean
-mono_thread_pool_remove_domain_jobs (MonoDomain *domain, int timeout)
+MonoBoolean
+ves_icall_System_Threading_ThreadPool_SetMinThreadsNative (gint32 worker_threads, gint32 completion_port_threads)
 {
-       HANDLE sem_handle;
-       int result;
-       guint32 start_time;
-
-       if (use_ms_threadpool ()) {
-               return mono_threadpool_ms_remove_domain_jobs (domain, timeout);
-       }
-
-       result = TRUE;
-       start_time = 0;
-
-       g_assert (domain->state == MONO_APPDOMAIN_UNLOADING);
-
-       threadpool_clear_queue (&async_tp, domain);
-       threadpool_clear_queue (&async_io_tp, domain);
-
-       mono_mutex_lock (&socket_io_data.io_lock);
-       if (socket_io_data.sock_to_state)
-               mono_g_hash_table_foreach_remove (socket_io_data.sock_to_state, remove_sockstate_for_domain, domain);
+       if (completion_port_threads <= 0 || completion_port_threads > threadpool.limit_io_max)
+               return FALSE;
 
-       mono_mutex_unlock (&socket_io_data.io_lock);
-       
-       /*
-        * There might be some threads out that could be about to execute stuff from the given domain.
-        * We avoid that by setting up a semaphore to be pulsed by the thread that reaches zero.
-        */
-       sem_handle = CreateSemaphore (NULL, 0, 1, NULL);
+       if (!mono_lazy_initialize (&status, initialize) || !mono_refcount_tryinc (&threadpool))
+               return FALSE;
 
-       domain->cleanup_semaphore = sem_handle;
-       /*
-        * The memory barrier here is required to have global ordering between assigning to cleanup_semaphone
-        * and reading threadpool_jobs.
-        * Otherwise this thread could read a stale version of threadpool_jobs and wait forever.
-        */
-       mono_memory_write_barrier ();
-
-       if (domain->threadpool_jobs && timeout != -1)
-               start_time = mono_msec_ticks ();
-       while (domain->threadpool_jobs) {
-               MONO_PREPARE_BLOCKING
-               WaitForSingleObject (sem_handle, timeout);
-               MONO_FINISH_BLOCKING
-               if (timeout != -1 && (mono_msec_ticks () - start_time) > timeout) {
-                       result = FALSE;
-                       break;
-               }
+       if (!mono_threadpool_worker_set_min (worker_threads)) {
+               mono_refcount_dec (&threadpool);
+               return FALSE;
        }
 
-       domain->cleanup_semaphore = NULL;
-       CloseHandle (sem_handle);
-       return result;
-}
+       threadpool.limit_io_min = completion_port_threads;
 
-static void
-threadpool_free_queue (ThreadPool *tp)
-{
-       mono_cq_destroy (tp->queue);
-       tp->queue = NULL;
+       mono_refcount_dec (&threadpool);
+       return TRUE;
 }
 
-gboolean
-mono_thread_pool_is_queue_array (MonoArray *o)
+MonoBoolean
+ves_icall_System_Threading_ThreadPool_SetMaxThreadsNative (gint32 worker_threads, gint32 completion_port_threads)
 {
-       if (use_ms_threadpool ()) {
-               return mono_threadpool_ms_is_queue_array (o);
-       }
+       gint cpu_count = mono_cpu_count ();
 
-       // gpointer obj = o;
-
-       // FIXME: need some fix in sgen code.
-       return FALSE;
-}
+       if (completion_port_threads < threadpool.limit_io_min || completion_port_threads < cpu_count)
+               return FALSE;
 
-static MonoWSQ *
-add_wsq (void)
-{
-       int i;
-       MonoWSQ *wsq;
+       if (!mono_lazy_initialize (&status, initialize) || !mono_refcount_tryinc (&threadpool))
+               return FALSE;
 
-       mono_mutex_lock (&wsqs_lock);
-       wsq = mono_wsq_create ();
-       if (wsqs == NULL) {
-               mono_mutex_unlock (&wsqs_lock);
-               return NULL;
-       }
-       for (i = 0; i < wsqs->len; i++) {
-               if (g_ptr_array_index (wsqs, i) == NULL) {
-                       wsqs->pdata [i] = wsq;
-                       mono_mutex_unlock (&wsqs_lock);
-                       return wsq;
-               }
+       if (!mono_threadpool_worker_set_max (worker_threads)) {
+               mono_refcount_dec (&threadpool);
+               return FALSE;
        }
-       g_ptr_array_add (wsqs, wsq);
-       mono_mutex_unlock (&wsqs_lock);
-       return wsq;
-}
 
-static void
-remove_wsq (MonoWSQ *wsq)
-{
-       gpointer data;
-
-       if (wsq == NULL)
-               return;
+       threadpool.limit_io_max = completion_port_threads;
 
-       mono_mutex_lock (&wsqs_lock);
-       if (wsqs == NULL) {
-               mono_mutex_unlock (&wsqs_lock);
-               return;
-       }
-       g_ptr_array_remove_fast (wsqs, wsq);
-       data = NULL;
-       /*
-        * Only clean this up when shutting down, any other case will error out
-        * if we're removing a queue that still has work items.
-        */
-       if (mono_runtime_is_shutting_down ()) {
-               while (mono_wsq_local_pop (&data)) {
-                       threadpool_jobs_dec (data);
-                       data = NULL;
-               }
-       }
-       mono_wsq_destroy (wsq);
-       mono_mutex_unlock (&wsqs_lock);
+       mono_refcount_dec (&threadpool);
+       return TRUE;
 }
 
-static void
-try_steal (MonoWSQ *local_wsq, gpointer *data, gboolean retry)
+void
+ves_icall_System_Threading_ThreadPool_InitializeVMTp (MonoBoolean *enable_worker_tracking)
 {
-       int i;
-       int ms;
-
-       if (wsqs == NULL || data == NULL || *data != NULL)
-               return;
-
-       ms = 0;
-       do {
-               if (mono_runtime_is_shutting_down ())
-                       return;
-
-               MONO_PREPARE_BLOCKING
-               mono_mutex_lock (&wsqs_lock);
-               MONO_FINISH_BLOCKING
-               for (i = 0; wsqs != NULL && i < wsqs->len; i++) {
-                       MonoWSQ *wsq;
+       if (enable_worker_tracking) {
+               // TODO implement some kind of switch to have the possibily to use it
+               *enable_worker_tracking = FALSE;
+       }
 
-                       wsq = wsqs->pdata [i];
-                       if (wsq == local_wsq || mono_wsq_count (wsq) == 0)
-                               continue;
-                       mono_wsq_try_steal (wsqs->pdata [i], data, ms);
-                       if (*data != NULL) {
-                               mono_mutex_unlock (&wsqs_lock);
-                               return;
-                       }
-               }
-               mono_mutex_unlock (&wsqs_lock);
-               ms += 10;
-       } while (retry && ms < 11);
+       mono_lazy_initialize (&status, initialize);
 }
 
-static gboolean
-dequeue_or_steal (ThreadPool *tp, gpointer *data, MonoWSQ *local_wsq)
+MonoBoolean
+ves_icall_System_Threading_ThreadPool_NotifyWorkItemComplete (void)
 {
-       MonoCQ *queue = tp->queue;
-       if (mono_runtime_is_shutting_down () || !queue)
+       if (mono_domain_is_unloading (mono_domain_get ()) || mono_runtime_is_shutting_down ())
                return FALSE;
-       mono_cq_dequeue (queue, (MonoObject **) data);
-       if (!tp->is_io && !*data)
-               try_steal (local_wsq, data, FALSE);
-       return (*data != NULL);
-}
-
-static gboolean
-should_i_die (ThreadPool *tp)
-{
-       gboolean result = FALSE;
-       if (tp->destroy_thread == 1 && InterlockedCompareExchange (&tp->destroy_thread, 0, 1) == 1)
-               result = (tp->nthreads > tp->min_threads);
-       return result;
-}
-
-static void
-set_tp_thread_info (ThreadPool *tp)
-{
-       const gchar *name;
-       MonoInternalThread *thread = mono_thread_internal_current ();
 
-       mono_profiler_thread_start (thread->tid);
-       name = (tp->is_io) ? "IO Threadpool worker" : "Threadpool worker";
-       mono_thread_set_name_internal (thread, mono_string_new (mono_domain_get (), name), FALSE);
-}
-
-static void
-clear_thread_state (void)
-{
-       MonoInternalThread *thread = mono_thread_internal_current ();
-       /* If the callee changes the background status, set it back to TRUE */
-       mono_thread_clr_state (thread , ~ThreadState_Background);
-       if (!mono_thread_test_state (thread , ThreadState_Background))
-               ves_icall_System_Threading_Thread_SetState (thread, ThreadState_Background);
+       return mono_threadpool_worker_notify_completed ();
 }
 
 void
-check_for_interruption_critical (void)
+ves_icall_System_Threading_ThreadPool_NotifyWorkItemProgressNative (void)
 {
-       MonoInternalThread *thread;
-       /*RULE NUMBER ONE OF SKIP_THREAD: NEVER POKE MANAGED STATE.*/
-       mono_gc_set_skip_thread (FALSE);
-
-       thread = mono_thread_internal_current ();
-       if (THREAD_WANTS_A_BREAK (thread))
-               mono_thread_interruption_checkpoint ();
-
-       /*RULE NUMBER TWO OF SKIP_THREAD: READ RULE NUMBER ONE.*/
-       mono_gc_set_skip_thread (TRUE);
+       mono_threadpool_worker_notify_completed ();
 }
 
-static void
-fire_profiler_thread_end (void)
+void
+ves_icall_System_Threading_ThreadPool_ReportThreadStatus (MonoBoolean is_working)
 {
-       MonoInternalThread *thread = mono_thread_internal_current ();
-       mono_profiler_thread_end (thread->tid);
+       // TODO
+       MonoError error;
+       mono_error_set_not_implemented (&error, "");
+       mono_error_set_pending_exception (&error);
 }
 
-static void
-async_invoke_thread (gpointer data)
+MonoBoolean
+ves_icall_System_Threading_ThreadPool_RequestWorkerThread (void)
 {
        MonoDomain *domain;
-       MonoWSQ *wsq;
-       ThreadPool *tp;
-       gboolean must_die;
-  
-       tp = data;
-       wsq = NULL;
-       if (!tp->is_io)
-               wsq = add_wsq ();
-
-       set_tp_thread_info (tp);
-
-       if (tp_start_func)
-               tp_start_func (tp_hooks_user_data);
-
-       data = NULL;
-       for (;;) {
-               MonoAsyncResult *ar;
-               MonoClass *klass;
-               gboolean is_io_task;
-               gboolean is_socket;
-               int n_naps = 0;
-
-               is_io_task = FALSE;
-               ar = (MonoAsyncResult *) data;
-               if (ar) {
-                       InterlockedIncrement (&tp->busy_threads);
-                       domain = ((MonoObject *)ar)->vtable->domain;
-#ifndef DISABLE_SOCKETS
-                       klass = ((MonoObject *) data)->vtable->klass;
-                       is_io_task = !is_corlib_asyncresult (domain, klass);
-                       is_socket = FALSE;
-                       if (is_io_task) {
-                               MonoSocketAsyncResult *state = (MonoSocketAsyncResult *) data;
-                               is_socket = is_socketasyncresult (domain, klass);
-                               ar = state->ares;
-                               switch (state->operation) {
-                               case AIO_OP_RECEIVE:
-                                       state->total = ICALL_RECV (state);
-                                       break;
-                               case AIO_OP_SEND:
-                                       state->total = ICALL_SEND (state);
-                                       break;
-                               }
-                       }
-#endif
-                       /* worker threads invokes methods in different domains,
-                        * so we need to set the right domain here */
-                       g_assert (domain);
-
-                       if (mono_domain_is_unloading (domain) || mono_runtime_is_shutting_down ()) {
-                               threadpool_jobs_dec ((MonoObject *)ar);
-                               data = NULL;
-                               ar = NULL;
-                               InterlockedDecrement (&tp->busy_threads);
-                       } else {
-                               mono_thread_push_appdomain_ref (domain);
-                               if (threadpool_jobs_dec ((MonoObject *)ar)) {
-                                       data = NULL;
-                                       ar = NULL;
-                                       mono_thread_pop_appdomain_ref ();
-                                       InterlockedDecrement (&tp->busy_threads);
-                                       continue;
-                               }
-
-                               if (mono_domain_set (domain, FALSE)) {
-                                       MonoObject *exc;
-
-                                       if (tp_item_begin_func)
-                                               tp_item_begin_func (tp_item_user_data);
-
-                                       exc = mono_async_invoke (tp, ar);
-                                       if (tp_item_end_func)
-                                               tp_item_end_func (tp_item_user_data);
-                                       if (exc)
-                                               mono_internal_thread_unhandled_exception (exc);
-                                       if (is_socket && tp->is_io) {
-                                               MonoSocketAsyncResult *state = (MonoSocketAsyncResult *) data;
-
-                                               if (state->completed && state->callback) {
-                                                       MonoAsyncResult *cb_ares;
-                                                       cb_ares = create_simple_asyncresult ((MonoObject *) state->callback,
-                                                                                               (MonoObject *) state);
-                                                       icall_append_job ((MonoObject *) cb_ares);
-                                               }
-                                       }
-                                       mono_domain_set (mono_get_root_domain (), TRUE);
-                               }
-                               mono_thread_pop_appdomain_ref ();
-                               InterlockedDecrement (&tp->busy_threads);
-                               clear_thread_state ();
-                       }
-               }
-
-               ar = NULL;
-               data = NULL;
-               must_die = should_i_die (tp);
-               if (must_die) {
-                       mono_wsq_suspend (wsq);
-               } else {
-                       if (tp->is_io || !mono_wsq_local_pop (&data))
-                               dequeue_or_steal (tp, &data, wsq);
-               }
-
-               n_naps = 0;
-               while (!must_die && !data && n_naps < 4) {
-                       gboolean res;
-
-                       InterlockedIncrement (&tp->waiting);
-
-                       // Another thread may have added a job into its wsq since the last call to dequeue_or_steal
-                       // Check all the queues again before entering the wait loop
-                       dequeue_or_steal (tp, &data, wsq);
-                       if (data) {
-                               InterlockedDecrement (&tp->waiting);
-                               break;
-                       }
-
-                       mono_gc_set_skip_thread (TRUE);
-                       MONO_PREPARE_BLOCKING
+       ThreadPoolDomain *tpdomain;
+       ThreadPoolCounter counter;
 
-#if defined(__OpenBSD__)
-                       while (mono_cq_count (tp->queue) == 0 && (res = mono_sem_wait (&tp->new_job, TRUE)) == -1) {// && errno == EINTR) {
-#else
-                       while (mono_cq_count (tp->queue) == 0 && (res = mono_sem_timedwait (&tp->new_job, 2000, TRUE)) == -1) {// && errno == EINTR) {
-#endif
-                               if (mono_runtime_is_shutting_down ())
-                                       break;
-                               check_for_interruption_critical ();
-                       }
-                       InterlockedDecrement (&tp->waiting);
+       domain = mono_domain_get ();
+       if (mono_domain_is_unloading (domain))
+               return FALSE;
 
-                       MONO_FINISH_BLOCKING
-                       mono_gc_set_skip_thread (FALSE);
+       if (!mono_lazy_initialize (&status, initialize) || !mono_refcount_tryinc (&threadpool)) {
+               /* threadpool has been destroyed, we are shutting down */
+               return FALSE;
+       }
 
-                       if (mono_runtime_is_shutting_down ())
-                               break;
-                       must_die = should_i_die (tp);
-                       dequeue_or_steal (tp, &data, wsq);
-                       n_naps++;
-               }
+       domains_lock ();
 
-               if (!data && !tp->is_io && !mono_runtime_is_shutting_down ()) {
-                       mono_wsq_local_pop (&data);
-                       if (data && must_die) {
-                               InterlockedCompareExchange (&tp->destroy_thread, 1, 0);
-                               pulse_on_new_job (tp);
-                       }
+       tpdomain = tpdomain_get (domain);
+       if (!tpdomain) {
+               /* synchronize with mono_threadpool_remove_domain_jobs */
+               if (mono_domain_is_unloading (domain)) {
+                       domains_unlock ();
+                       mono_refcount_dec (&threadpool);
+                       return FALSE;
                }
 
-               if (!data) {
-                       gint nt;
-                       gboolean down;
-                       while (1) {
-                               nt = tp->nthreads;
-                               down = mono_runtime_is_shutting_down ();
-                               if (!down && nt <= tp->min_threads)
-                                       break;
-                               if (down || InterlockedCompareExchange (&tp->nthreads, nt - 1, nt) == nt) {
-#ifndef DISABLE_PERFCOUNTERS
-                                       mono_perfcounter_update_value (tp->pc_nthreads, TRUE, -1);
-#endif
-                                       if (!tp->is_io) {
-                                               remove_wsq (wsq);
-                                       }
-
-                                       fire_profiler_thread_end ();
-
-                                       if (tp_finish_func)
-                                               tp_finish_func (tp_hooks_user_data);
-
-                                       if (!tp->is_io) {
-                                               if (threads) {
-                                                       mono_mutex_lock (&threads_lock);
-                                                       if (threads)
-                                                               g_ptr_array_remove_fast (threads, mono_thread_current ()->internal_thread);
-                                                       mono_mutex_unlock (&threads_lock);
-                                               }
-                                       }
-
-                                       return;
-                               }
-                       }
-               }
+               tpdomain = tpdomain_create (domain);
        }
 
-       g_assert_not_reached ();
-}
-
-void
-ves_icall_System_Threading_ThreadPool_GetAvailableThreads (gint *workerThreads, gint *completionPortThreads)
-{
-       *workerThreads = async_tp.max_threads - async_tp.busy_threads;
-       *completionPortThreads = async_io_tp.max_threads - async_io_tp.busy_threads;
-}
-
-void
-ves_icall_System_Threading_ThreadPool_GetMaxThreads (gint *workerThreads, gint *completionPortThreads)
-{
-       *workerThreads = async_tp.max_threads;
-       *completionPortThreads = async_io_tp.max_threads;
-}
-
-void
-ves_icall_System_Threading_ThreadPool_GetMinThreads (gint *workerThreads, gint *completionPortThreads)
-{
-       *workerThreads = async_tp.min_threads;
-       *completionPortThreads = async_io_tp.min_threads;
-}
-
-MonoBoolean
-ves_icall_System_Threading_ThreadPool_SetMinThreads (gint workerThreads, gint completionPortThreads)
-{
-       gint max_threads;
-       gint max_io_threads;
-
-       max_threads = async_tp.max_threads;
-       if (workerThreads <= 0 || workerThreads > max_threads)
-               return FALSE;
+       g_assert (tpdomain);
 
-       max_io_threads = async_io_tp.max_threads;
-       if (completionPortThreads <= 0 || completionPortThreads > max_io_threads)
-               return FALSE;
+       tpdomain->outstanding_request ++;
+       g_assert (tpdomain->outstanding_request >= 1);
 
-       InterlockedExchange (&async_tp.min_threads, workerThreads);
-       InterlockedExchange (&async_io_tp.min_threads, completionPortThreads);
-       if (workerThreads > async_tp.nthreads)
-               mono_thread_create_internal (mono_get_root_domain (), threadpool_start_idle_threads, &async_tp, TRUE, SMALL_STACK);
-       if (completionPortThreads > async_io_tp.nthreads)
-               mono_thread_create_internal (mono_get_root_domain (), threadpool_start_idle_threads, &async_io_tp, TRUE, SMALL_STACK);
-       return TRUE;
-}
+       domains_unlock ();
 
-MonoBoolean
-ves_icall_System_Threading_ThreadPool_SetMaxThreads (gint workerThreads, gint completionPortThreads)
-{
-       gint min_threads;
-       gint min_io_threads;
-       gint cpu_count;
+       COUNTER_ATOMIC (counter, {
+               if (counter._.starting == 16) {
+                       mono_refcount_dec (&threadpool);
+                       return TRUE;
+               }
 
-       cpu_count = mono_cpu_count ();
-       min_threads = async_tp.min_threads;
-       if (workerThreads < min_threads || workerThreads < cpu_count)
-               return FALSE;
+               counter._.starting ++;
+       });
 
-       /* We don't really have the concept of completion ports. Do we care here? */
-       min_io_threads = async_io_tp.min_threads;
-       if (completionPortThreads < min_io_threads || completionPortThreads < cpu_count)
-               return FALSE;
+       mono_threadpool_worker_request ();
 
-       InterlockedExchange (&async_tp.max_threads, workerThreads);
-       InterlockedExchange (&async_io_tp.max_threads, completionPortThreads);
+       mono_refcount_dec (&threadpool);
        return TRUE;
 }
 
-/**
- * mono_install_threadpool_thread_hooks
- * @start_func: the function to be called right after a new threadpool thread is created. Can be NULL.
- * @finish_func: the function to be called right before a thredpool thread is exiting. Can be NULL.
- * @user_data: argument passed to @start_func and @finish_func.
- *
- * @start_fun will be called right after a threadpool thread is created and @finish_func right before a threadpool thread exits.
- * The calls will be made from the thread itself.
- */
-void
-mono_install_threadpool_thread_hooks (MonoThreadPoolFunc start_func, MonoThreadPoolFunc finish_func, gpointer user_data)
-{
-       tp_start_func = start_func;
-       tp_finish_func = finish_func;
-       tp_hooks_user_data = user_data;
-}
-
-/**
- * mono_install_threadpool_item_hooks
- * @begin_func: the function to be called before a threadpool work item processing starts.
- * @end_func: the function to be called after a threadpool work item is finished.
- * @user_data: argument passed to @begin_func and @end_func.
- *
- * The calls will be made from the thread itself and from the same AppDomain
- * where the work item was executed.
- *
- */
-void
-mono_install_threadpool_item_hooks (MonoThreadPoolItemFunc begin_func, MonoThreadPoolItemFunc end_func, gpointer user_data)
-{
-       tp_item_begin_func = begin_func;
-       tp_item_end_func = end_func;
-       tp_item_user_data = user_data;
-}
-
-void
-mono_internal_thread_unhandled_exception (MonoObject* exc)
+MonoBoolean G_GNUC_UNUSED
+ves_icall_System_Threading_ThreadPool_PostQueuedCompletionStatus (MonoNativeOverlapped *native_overlapped)
 {
-       if (mono_runtime_unhandled_exception_policy_get () == MONO_UNHANDLED_POLICY_CURRENT) {
-               gboolean unloaded;
-               MonoClass *klass;
-
-               klass = exc->vtable->klass;
-               unloaded = is_appdomainunloaded_exception (exc->vtable->domain, klass);
-               if (!unloaded && klass != mono_defaults.threadabortexception_class) {
-                       mono_unhandled_exception (exc);
-                       if (mono_environment_exitcode_get () == 1)
-                               exit (255);
-               }
-               if (klass == mono_defaults.threadabortexception_class)
-                mono_thread_internal_reset_abort (mono_thread_internal_current ());
-       }
+       /* This copy the behavior of the current Mono implementation */
+       MonoError error;
+       mono_error_set_not_implemented (&error, "");
+       mono_error_set_pending_exception (&error);
+       return FALSE;
 }
 
-/*
- * Suspend creation of new threads.
- */
-void
-mono_thread_pool_suspend (void)
+MonoBoolean G_GNUC_UNUSED
+ves_icall_System_Threading_ThreadPool_BindIOCompletionCallbackNative (gpointer file_handle)
 {
-       if (use_ms_threadpool ()) {
-               mono_threadpool_ms_suspend ();
-               return;
-       }
-       suspended = TRUE;
+       /* This copy the behavior of the current Mono implementation */
+       return TRUE;
 }
 
-/*
- * Resume creation of new threads.
- */
-void
-mono_thread_pool_resume (void)
+MonoBoolean G_GNUC_UNUSED
+ves_icall_System_Threading_ThreadPool_IsThreadPoolHosted (void)
 {
-       if (use_ms_threadpool ()) {
-               mono_threadpool_ms_resume ();
-               return;
-       }
-       suspended = FALSE;
+       return FALSE;
 }