[threadpool-ms] Fix race condition on domain unload (#3592)
authorLudovic Henry <ludovic@xamarin.com>
Wed, 21 Sep 2016 09:16:35 +0000 (11:16 +0200)
committerGitHub <noreply@github.com>
Wed, 21 Sep 2016 09:16:35 +0000 (11:16 +0200)
This race condition has been introduced with 348d793ebb716012326b113fcdd7d4c855ffb01e. This is because we would wait on the cleanup_semaphore outside of the threadpool->domains_lock lock.

By using a cond variable instead of a semaphore, we avoid a potential deadlock.

mono/metadata/threadpool-ms.c

index 462fd69a77f172521b3ff983aae71160b2099aa4..7849d4554f5646220aa302bcc00ae013dfcd837d 100644 (file)
@@ -162,7 +162,7 @@ typedef struct {
 
 typedef struct {
        gint32 ref;
-       MonoCoopSem sem;
+       MonoCoopCond cond;
 } ThreadPoolDomainCleanupSemaphore;
 
 typedef enum {
@@ -439,7 +439,7 @@ domain_get (MonoDomain *domain, gboolean create)
                ThreadPoolDomainCleanupSemaphore *cleanup_semaphore;
                cleanup_semaphore = g_new0 (ThreadPoolDomainCleanupSemaphore, 1);
                cleanup_semaphore->ref = 2;
-               mono_coop_sem_init (&cleanup_semaphore->sem, 0);
+               mono_coop_cond_init (&cleanup_semaphore->cond);
 
                g_assert(!domain->cleanup_semaphore);
                domain->cleanup_semaphore = cleanup_semaphore;
@@ -702,11 +702,12 @@ worker_thread (gpointer data)
                        g_assert (removed);
 
                        cleanup_semaphore = (ThreadPoolDomainCleanupSemaphore*) tpdomain->domain->cleanup_semaphore;
+                       g_assert (cleanup_semaphore);
+
+                       mono_coop_cond_signal (&cleanup_semaphore->cond);
 
-                       g_assert(cleanup_semaphore);
-                       mono_coop_sem_post (&cleanup_semaphore->sem);
                        if (InterlockedDecrement (&cleanup_semaphore->ref) == 0) {
-                               mono_coop_sem_destroy (&cleanup_semaphore->sem);
+                               mono_coop_cond_destroy (&cleanup_semaphore->cond);
                                g_free (cleanup_semaphore);
                                tpdomain->domain->cleanup_semaphore = NULL;
                        }
@@ -1453,10 +1454,10 @@ mono_threadpool_ms_end_invoke (MonoAsyncResult *ares, MonoArray **out_args, Mono
 gboolean
 mono_threadpool_ms_remove_domain_jobs (MonoDomain *domain, int timeout)
 {
-       gint res;
-       gint64 now, end;
+       gint64 end;
        ThreadPoolDomain *tpdomain;
        ThreadPoolDomainCleanupSemaphore *cleanup_semaphore;
+       gboolean ret;
 
        g_assert (domain);
        g_assert (timeout >= -1);
@@ -1493,29 +1494,41 @@ mono_threadpool_ms_remove_domain_jobs (MonoDomain *domain, int timeout)
                return TRUE;
        }
 
-       mono_coop_mutex_unlock(&threadpool->domains_lock);
-
        g_assert (domain->cleanup_semaphore);
-
        cleanup_semaphore = (ThreadPoolDomainCleanupSemaphore*) domain->cleanup_semaphore;
 
-       if (timeout == -1) {
-               res = mono_coop_sem_wait (&cleanup_semaphore->sem, MONO_SEM_FLAGS_NONE);
-               g_assert (res == MONO_SEM_TIMEDWAIT_RET_SUCCESS);
-       } else {
-               now = mono_msec_ticks();
-               if (now > end)
-                       return FALSE;
-               res = mono_coop_sem_timedwait (&cleanup_semaphore->sem, end - now, MONO_SEM_FLAGS_NONE);
-       }
+       ret = TRUE;
+
+       do {
+               if (timeout == -1) {
+                       mono_coop_cond_wait (&cleanup_semaphore->cond, &threadpool->domains_lock);
+               } else {
+                       gint64 now;
+                       gint res;
+
+                       now = mono_msec_ticks();
+                       if (now > end) {
+                               ret = FALSE;
+                               break;
+                       }
+
+                       res = mono_coop_cond_timedwait (&cleanup_semaphore->cond, &threadpool->domains_lock, end - now);
+                       if (res != 0) {
+                               ret = FALSE;
+                               break;
+                       }
+               }
+       } while (tpdomain->outstanding_request != 0);
 
        if (InterlockedDecrement (&cleanup_semaphore->ref) == 0) {
-               mono_coop_sem_destroy (&cleanup_semaphore->sem);
+               mono_coop_cond_destroy (&cleanup_semaphore->cond);
                g_free (cleanup_semaphore);
                domain->cleanup_semaphore = NULL;
        }
 
-       return res == MONO_SEM_TIMEDWAIT_RET_SUCCESS;
+       mono_coop_mutex_unlock(&threadpool->domains_lock);
+
+       return ret;
 }
 
 void