[profiler] Require an ack of the previous sampling signal before sending another.
authorAlex Rønne Petersen <alexrp@xamarin.com>
Tue, 20 Jun 2017 01:13:20 +0000 (03:13 +0200)
committerAlex Rønne Petersen <alexrp@xamarin.com>
Tue, 20 Jun 2017 03:39:14 +0000 (05:39 +0200)
This avoids situations where we could end up filling the signal queue
completely, leading to the undocumented EAGAIN return value from pthread_kill,
which breaks ~everything in the STW/GC code because it thinks an error return
value from pthread_kill means the thread has exited.

The actual reason why this happened is that when a thread is suspended, we mask
all signals except the restart signal for that thread. This means that a
program with a large number of threads or where GC collections could take
longer than usual would end up with tens of thousands of sampling signals
queued up, eventually reaching the limit of the user-level signal queue (see
`ulimit -i` and `RLIMIT_SIGPENDING`).

We only do this on systems where we use real time signals. Non-RT signals do
not have this problem as the kernel will throttle them.

mono/mini/mini-posix.c
mono/utils/mono-threads.c
mono/utils/mono-threads.h

index 5bcf5435df0e2a65a9a2f154b6582c80f8f3674f..eb0ddd11bf3b35947af70e0f7bfef37a90f68d47 100644 (file)
@@ -315,7 +315,9 @@ static gint32 profiler_interrupt_signals_received;
 MONO_SIG_HANDLER_FUNC (static, profiler_signal_handler)
 {
        int old_errno = errno;
-       int hp_save_index;
+
+       InterlockedWrite (&mono_thread_info_current ()->profiler_signal_ack, 1);
+
        MONO_SIG_HANDLER_GET_CONTEXT;
 
        /* See the comment in mono_runtime_shutdown_stat_profiler (). */
@@ -326,21 +328,24 @@ MONO_SIG_HANDLER_FUNC (static, profiler_signal_handler)
 
        InterlockedIncrement (&profiler_signals_received);
 
-       if (mono_thread_info_get_small_id () == -1)
-               return; //an non-attached thread got the signal
-
-       if (!mono_domain_get () || !mono_tls_get_jit_tls ())
-               return; //thread in the process of dettaching
+       // Did a non-attached or detaching thread get the signal?
+       if (mono_thread_info_get_small_id () == -1 ||
+           !mono_domain_get () ||
+           !mono_tls_get_jit_tls ()) {
+               errno = old_errno;
+               return;
+       }
 
        InterlockedIncrement (&profiler_signals_accepted);
 
-       hp_save_index = mono_hazard_pointer_save_for_signal_handler ();
+       int hp_save_index = mono_hazard_pointer_save_for_signal_handler ();
 
        mono_thread_info_set_is_async_context (TRUE);
        per_thread_profiler_hit (ctx);
        mono_thread_info_set_is_async_context (FALSE);
 
        mono_hazard_pointer_restore_for_signal_handler (hp_save_index);
+
        errno = old_errno;
 
        mono_chain_signal (MONO_SIG_HANDLER_PARAMS);
@@ -729,6 +734,14 @@ sampling_thread_func (void *data)
                        /* info should never be this thread as we're a tools thread. */
                        g_assert (mono_thread_info_get_tid (info) != mono_native_thread_id_get ());
 
+                       /*
+                        * Require an ack for the last sampling signal sent to the thread
+                        * so that we don't overflow the signal queue, leading to all sorts
+                        * of problems (e.g. GC STW failing).
+                        */
+                       if (profiler_signal != SIGPROF && !InterlockedCompareExchange (&info->profiler_signal_ack, 0, 1))
+                               continue;
+
                        mono_threads_pthread_kill (info, profiler_signal);
                        InterlockedIncrement (&profiler_signals_sent);
                } FOREACH_THREAD_SAFE_END
index 1cf4bb334c94b84e95602d8bdb7b8f5fffb93a6e..c7b4ae76384c6dc7ae7560ba8074a22f22aa09fa 100644 (file)
@@ -395,6 +395,8 @@ register_thread (MonoThreadInfo *info)
 
        info->internal_thread_gchandle = G_MAXUINT32;
 
+       info->profiler_signal_ack = 1;
+
        mono_threads_suspend_register (info);
 
        THREADS_DEBUG ("registering info %p tid %p small id %x\n", info, mono_thread_info_get_tid (info), info->small_id);
index fb5fb65f8995aada6f07cb928be9569c5ae11ccc..ff3b40ed525e236b1fcafdeea25aaac9f4015620 100644 (file)
@@ -222,6 +222,12 @@ typedef struct {
 
        /* GCHandle to MonoInternalThread */
        guint32 internal_thread_gchandle;
+
+       /*
+        * Used by the sampling code in mini-posix.c to ensure that a thread has
+        * handled a sampling signal before sending another one.
+        */
+       gint32 profiler_signal_ack;
 } MonoThreadInfo;
 
 typedef struct {