Merge pull request #3321 from BrzVlad/fix-block-state-membar

[mono.git] / mono / mini / mini-posix.c
diff --git a/mono/mini/mini-posix.c b/mono/mini/mini-posix.c

index 63198d79083cdafc1718a111016e0be15ad63a87..6f6e266a3b025d72fd9998435b46a8b053747b31 100644 (file)
--- a/mono/mini/mini-posix.c
+++ b/mono/mini/mini-posix.c
@@ -9,6 +9,7 @@
   * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
   *
   * See LICENSE for licensing information.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
   */
  #include <config.h>
  #include <signal.h>
@@ -26,7 +27,7 @@
  #include <sys/syscall.h>
  #endif
  #include <errno.h>
-
+#include <sched.h>
  
  #include <mono/metadata/assembly.h>
  #include <mono/metadata/loader.h>
@@ -58,6 +59,7 @@
  #include <mono/utils/dtrace.h>
  #include <mono/utils/mono-signal-handler.h>
  #include <mono/utils/mono-threads.h>
+#include <mono/utils/mono-threads-posix-signals.h>
  
  #include "mini.h"
  #include <string.h>
@@ -68,6 +70,12 @@
  
  #include "jit-icalls.h"
  
+#ifdef PLATFORM_MACOSX
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#include <mach/clock.h>
+#endif
+
  #if defined(__native_client__) || defined(HOST_WATCHOS)
  
  void
@@ -99,7 +107,8 @@ mono_runtime_install_handlers (void)
  void
  mono_runtime_posix_install_handlers(void)
  {
-
+       /* we still need to ignore SIGPIPE */
+       signal (SIGPIPE, SIG_IGN);
  }
  
  void
@@ -234,21 +243,11 @@ MONO_SIG_HANDLER_FUNC (static, sigabrt_signal_handler)
  #define FULL_STAT_PROFILER_BACKTRACE 0
  #endif
  
-#ifdef SIGPROF
-
-static int profiling_signal_in_use;
-
-#if defined(__ia64__) || defined(__sparc__) || defined(sparc)
-
-MONO_SIG_HANDLER_FUNC (static, sigprof_signal_handler)
-{
-       if (mono_chain_signal (MONO_SIG_HANDLER_PARAMS))
-               return;
-
-       NOT_IMPLEMENTED;
-}
+#if (defined (USE_POSIX_BACKEND) && defined (SIGRTMIN)) || defined (SIGPROF)
+#define HAVE_PROFILER_SIGNAL
+#endif
  
-#else
+#ifdef HAVE_PROFILER_SIGNAL
  
  static void
  per_thread_profiler_hit (void *ctx)
@@ -324,31 +323,42 @@ per_thread_profiler_hit (void *ctx)
         }
  }
  
-MONO_SIG_HANDLER_FUNC (static, sigprof_signal_handler)
+static MonoNativeThreadId sampling_thread;
+
+static gint32 profiler_signals_sent;
+static gint32 profiler_signals_received;
+static gint32 profiler_signals_accepted;
+static gint32 profiler_interrupt_signals_received;
+
+MONO_SIG_HANDLER_FUNC (static, profiler_signal_handler)
  {
         int old_errno = errno;
         int hp_save_index;
         MONO_SIG_HANDLER_GET_CONTEXT;
  
+       /* See the comment in mono_runtime_shutdown_stat_profiler (). */
+       if (mono_native_thread_id_get () == sampling_thread) {
+#ifdef HAVE_CLOCK_NANOSLEEP
+               if (mono_profiler_get_sampling_mode () == MONO_PROFILER_STAT_MODE_PROCESS) {
+                       InterlockedIncrement (&profiler_interrupt_signals_received);
+                       return;
+               }
+#endif
+
+               g_error ("%s: Unexpected profiler signal received by the sampler thread", __func__);
+       }
+
+       InterlockedIncrement (&profiler_signals_received);
+
         if (mono_thread_info_get_small_id () == -1)
                 return; //an non-attached thread got the signal
  
         if (!mono_domain_get () || !mono_native_tls_get_value (mono_jit_tls_id))
                 return; //thread in the process of dettaching
  
-       hp_save_index = mono_hazard_pointer_save_for_signal_handler ();
-
-       /* If we can't consume a profiling request it means we're the initiator. */
-       if (!(mono_threads_consume_async_jobs () & MONO_SERVICE_REQUEST_SAMPLE)) {
-               FOREACH_THREAD_SAFE (info) {
-                       if (mono_thread_info_get_tid (info) == mono_native_thread_id_get () ||
-                           !mono_thread_info_is_live (info))
-                               continue;
+       InterlockedIncrement (&profiler_signals_accepted);
  
-                       mono_threads_add_async_job (info, MONO_SERVICE_REQUEST_SAMPLE);
-                       mono_threads_pthread_kill (info, profiling_signal_in_use);
-               } FOREACH_THREAD_SAFE_END
-       }
+       hp_save_index = mono_hazard_pointer_save_for_signal_handler ();
  
         mono_thread_info_set_is_async_context (TRUE);
         per_thread_profiler_hit (ctx);
@@ -360,7 +370,6 @@ MONO_SIG_HANDLER_FUNC (static, sigprof_signal_handler)
         mono_chain_signal (MONO_SIG_HANDLER_PARAMS);
  }
  
-#endif
  #endif
  
  MONO_SIG_HANDLER_FUNC (static, sigquit_signal_handler)
@@ -521,123 +530,334 @@ mono_runtime_cleanup_handlers (void)
         free_saved_signal_handlers ();
  }
  
-#ifdef HAVE_LINUX_RTC_H
-#include <linux/rtc.h>
-#include <sys/ioctl.h>
-#include <fcntl.h>
-static int rtc_fd = -1;
+#ifdef HAVE_PROFILER_SIGNAL
+
+static volatile gint32 sampling_thread_running;
+
+#ifdef PLATFORM_MACOSX
+
+static clock_serv_t sampling_clock_service;
  
-static int
-enable_rtc_timer (gboolean enable)
+static void
+clock_init (void)
  {
-       int flags;
-       flags = fcntl (rtc_fd, F_GETFL);
-       if (flags < 0) {
-               perror ("getflags");
-               return 0;
-       }
-       if (enable)
-               flags |= FASYNC;
-       else
-               flags &= ~FASYNC;
-       if (fcntl (rtc_fd, F_SETFL, flags) == -1) {
-               perror ("setflags");
-               return 0;
-       }
-       return 1;
+       kern_return_t ret;
+
+       do {
+               ret = host_get_clock_service (mach_host_self (), SYSTEM_CLOCK, &sampling_clock_service);
+       } while (ret == KERN_ABORTED);
+
+       if (ret != KERN_SUCCESS)
+               g_error ("%s: host_get_clock_service () returned %d", __func__, ret);
  }
-#endif
  
-void
-mono_runtime_shutdown_stat_profiler (void)
+static void
+clock_cleanup (void)
  {
-#ifdef HAVE_LINUX_RTC_H
-       if (rtc_fd >= 0)
-               enable_rtc_timer (FALSE);
-#endif
+       kern_return_t ret;
+
+       do {
+               ret = mach_port_deallocate (mach_task_self (), sampling_clock_service);
+       } while (ret == KERN_ABORTED);
+
+       if (ret != KERN_SUCCESS)
+               g_error ("%s: mach_port_deallocate () returned %d", __func__, ret);
  }
  
-#ifdef ITIMER_PROF
-static int
-get_itimer_mode (void)
+static guint64
+clock_get_time_ns (void)
  {
-       switch (mono_profiler_get_sampling_mode ()) {
-       case MONO_PROFILER_STAT_MODE_PROCESS: return ITIMER_PROF;
-       case MONO_PROFILER_STAT_MODE_REAL: return ITIMER_REAL;
-       }
-       g_assert_not_reached ();
-       return 0;
+       kern_return_t ret;
+       mach_timespec_t mach_ts;
+
+       do {
+               ret = clock_get_time (sampling_clock_service, &mach_ts);
+       } while (ret == KERN_ABORTED);
+
+       if (ret != KERN_SUCCESS)
+               g_error ("%s: clock_get_time () returned %d", __func__, ret);
+
+       return ((guint64) mach_ts.tv_sec * 1000000000) + (guint64) mach_ts.tv_nsec;
+}
+
+static void
+clock_sleep_ns_abs (guint64 ns_abs)
+{
+       kern_return_t ret;
+       mach_timespec_t then, remain_unused;
+
+       then.tv_sec = ns_abs / 1000000000;
+       then.tv_nsec = ns_abs % 1000000000;
+
+       do {
+               ret = clock_sleep (sampling_clock_service, TIME_ABSOLUTE, then, &remain_unused);
+
+               if (ret != KERN_SUCCESS && ret != KERN_ABORTED)
+                       g_error ("%s: clock_sleep () returned %d", __func__, ret);
+       } while (ret == KERN_ABORTED && InterlockedRead (&sampling_thread_running));
  }
  
-static int
-get_itimer_signal (void)
+#else
+
+clockid_t sampling_posix_clock;
+
+static void
+clock_init (void)
  {
         switch (mono_profiler_get_sampling_mode ()) {
-       case MONO_PROFILER_STAT_MODE_PROCESS: return SIGPROF;
-       case MONO_PROFILER_STAT_MODE_REAL: return SIGALRM;
+       case MONO_PROFILER_STAT_MODE_PROCESS:
+#ifdef HAVE_CLOCK_NANOSLEEP
+               /*
+                * If we don't have clock_nanosleep (), measuring the process time
+                * makes very little sense as we can only use nanosleep () to sleep on
+                * real time.
+                */
+               sampling_posix_clock = CLOCK_PROCESS_CPUTIME_ID;
+               break;
+#endif
+       case MONO_PROFILER_STAT_MODE_REAL: sampling_posix_clock = CLOCK_MONOTONIC; break;
+       default: g_assert_not_reached (); break;
         }
-       g_assert_not_reached ();
-       return 0;
  }
+
+static void
+clock_cleanup (void)
+{
+}
+
+static guint64
+clock_get_time_ns (void)
+{
+       struct timespec ts;
+
+       if (clock_gettime (sampling_posix_clock, &ts) == -1)
+               g_error ("%s: clock_gettime () returned -1, errno = %d", __func__, errno);
+
+       return ((guint64) ts.tv_sec * 1000000000) + (guint64) ts.tv_nsec;
+}
+
+static void
+clock_sleep_ns_abs (guint64 ns_abs)
+{
+#ifdef HAVE_CLOCK_NANOSLEEP
+       int ret;
+       struct timespec then;
+
+       then.tv_sec = ns_abs / 1000000000;
+       then.tv_nsec = ns_abs % 1000000000;
+
+       do {
+               ret = clock_nanosleep (sampling_posix_clock, TIMER_ABSTIME, &then, NULL);
+
+               if (ret != 0 && ret != EINTR)
+                       g_error ("%s: clock_nanosleep () returned %d", __func__, ret);
+       } while (ret == EINTR && InterlockedRead (&sampling_thread_running));
+#else
+       int ret;
+       gint64 diff;
+       struct timespec req;
+
+       /*
+        * What follows is a crude attempt at emulating clock_nanosleep () on OSs
+        * which don't provide it (e.g. FreeBSD).
+        *
+        * The problem with nanosleep () is that if it is interrupted by a signal,
+        * time will drift as a result of having to restart the call after the
+        * signal handler has finished. For this reason, we avoid using the rem
+        * argument of nanosleep (). Instead, before every nanosleep () call, we
+        * check if enough time has passed to satisfy the sleep request. If yes, we
+        * simply return. If not, we calculate the difference and do another sleep.
+        *
+        * This should reduce the amount of drift that happens because we account
+        * for the time spent executing the signal handler, which nanosleep () is
+        * not guaranteed to do for the rem argument.
+        *
+        * The downside to this approach is that it is slightly expensive: We have
+        * to make an extra system call to retrieve the current time whenever we're
+        * going to restart a nanosleep () call. This is unlikely to be a problem
+        * in practice since the sampling thread won't be receiving many signals in
+        * the first place (it's a tools thread, so no STW), and because typical
+        * sleep periods for the thread are many orders of magnitude bigger than
+        * the time it takes to actually perform that system call (just a few
+        * nanoseconds).
+        */
+       do {
+               diff = (gint64) ns_abs - (gint64) clock_get_time_ns ();
+
+               if (diff <= 0)
+                       break;
+
+               req.tv_sec = diff / 1000000000;
+               req.tv_nsec = diff % 1000000000;
+
+               if ((ret = nanosleep (&req, NULL)) == -1 && errno != EINTR)
+                       g_error ("%s: nanosleep () returned -1, errno = %d", __func__, errno);
+       } while (ret == -1 && InterlockedRead (&sampling_thread_running));
+#endif
+}
+
  #endif
  
+static int profiler_signal;
+static volatile gint32 sampling_thread_exiting;
+
+static mono_native_thread_return_t
+sampling_thread_func (void *data)
+{
+       mono_threads_attach_tools_thread ();
+       mono_native_thread_set_name (mono_native_thread_id_get (), "Profiler sampler");
+
+       gint64 rate = 1000000000 / mono_profiler_get_sampling_rate ();
+
+       int old_policy;
+       struct sched_param old_sched;
+       pthread_getschedparam (pthread_self (), &old_policy, &old_sched);
+
+       /*
+        * Attempt to switch the thread to real time scheduling. This will not
+        * necessarily work on all OSs; for example, most Linux systems will give
+        * us EPERM here unless configured to allow this.
+        *
+        * TODO: This does not work on Mac (and maybe some other OSs). On Mac, we
+        * have to use the Mach thread policy routines to switch to real-time
+        * scheduling. This is quite tricky as we need to specify how often we'll
+        * be doing work (easy), the normal processing time needed (also easy),
+        * and the maximum amount of processing time needed (hard). This is
+        * further complicated by the fact that if we misbehave and take too long
+        * to do our work, the kernel may knock us back down to the normal thread
+        * scheduling policy without telling us.
+        */
+       struct sched_param sched = { .sched_priority = sched_get_priority_max (SCHED_FIFO) };
+       pthread_setschedparam (pthread_self (), SCHED_FIFO, &sched);
+
+       clock_init ();
+
+       guint64 sleep = clock_get_time_ns ();
+
+       while (InterlockedRead (&sampling_thread_running)) {
+               sleep += rate;
+
+               FOREACH_THREAD_SAFE (info) {
+                       /* info should never be this thread as we're a tools thread. */
+                       g_assert (mono_thread_info_get_tid (info) != mono_native_thread_id_get ());
+
+                       mono_threads_pthread_kill (info, profiler_signal);
+                       InterlockedIncrement (&profiler_signals_sent);
+               } FOREACH_THREAD_SAFE_END
+
+               clock_sleep_ns_abs (sleep);
+       }
+
+       InterlockedWrite (&sampling_thread_exiting, 1);
+
+       clock_cleanup ();
+
+       pthread_setschedparam (pthread_self (), old_policy, &old_sched);
+
+       mono_thread_info_detach ();
+
+       return NULL;
+}
+
  void
-mono_runtime_setup_stat_profiler (void)
+mono_runtime_shutdown_stat_profiler (void)
  {
-#ifdef ITIMER_PROF
-       struct itimerval itval;
-       static int inited = 0;
-#ifdef HAVE_LINUX_RTC_H
-       const char *rtc_freq;
-       if (!inited && (rtc_freq = g_getenv ("MONO_RTC"))) {
-               int freq = 0;
-               inited = 1;
-               if (*rtc_freq)
-                       freq = atoi (rtc_freq);
-               if (!freq)
-                       freq = 1024;
-               rtc_fd = open ("/dev/rtc", O_RDONLY);
-               if (rtc_fd == -1) {
-                       perror ("open /dev/rtc");
-                       return;
-               }
-               profiling_signal_in_use = SIGPROF;
-               add_signal_handler (profiling_signal_in_use, sigprof_signal_handler, SA_RESTART);
-               if (ioctl (rtc_fd, RTC_IRQP_SET, freq) == -1) {
-                       perror ("set rtc freq");
-                       return;
-               }
-               if (ioctl (rtc_fd, RTC_PIE_ON, 0) == -1) {
-                       perror ("start rtc");
-                       return;
-               }
-               if (fcntl (rtc_fd, F_SETSIG, SIGPROF) == -1) {
-                       perror ("setsig");
-                       return;
-               }
-               if (fcntl (rtc_fd, F_SETOWN, getpid ()) == -1) {
-                       perror ("setown");
-                       return;
+       InterlockedWrite (&sampling_thread_running, 0);
+
+#ifdef HAVE_CLOCK_NANOSLEEP
+       /*
+        * There is a slight problem when we're using CLOCK_PROCESS_CPUTIME_ID: If
+        * we're shutting down and there's largely no activity in the process other
+        * than waiting for the sampler thread to shut down, it can take upwards of
+        * 20 seconds (depending on a lot of factors) for us to shut down because
+        * the sleep progresses very slowly as a result of the low CPU activity.
+        *
+        * We fix this by repeatedly sending the profiler signal to the sampler
+        * thread in order to interrupt the sleep. clock_sleep_ns_abs () will check
+        * sampling_thread_running upon an interrupt and return immediately if it's
+        * zero. profiler_signal_handler () has a special case to ignore the signal
+        * for the sampler thread.
+        *
+        * We do not need to do this on platforms where we use a regular sleep
+        * based on a monotonic clock. The sleep will return in a reasonable amount
+        * of time in those cases.
+        */
+       if (mono_profiler_get_sampling_mode () == MONO_PROFILER_STAT_MODE_PROCESS) {
+               MonoThreadInfo *info;
+
+               // Did it shut down already?
+               if ((info = mono_thread_info_lookup (sampling_thread))) {
+                       while (!InterlockedRead (&sampling_thread_exiting)) {
+                               mono_threads_pthread_kill (info, profiler_signal);
+                               mono_thread_info_usleep (10 * 1000 /* 10ms */);
+                       }
+
+                       // Make sure info can be freed.
+                       mono_hazard_pointer_clear (mono_hazard_pointer_get (), 1);
                 }
-               enable_rtc_timer (TRUE);
-               return;
         }
-       if (rtc_fd >= 0)
-               return;
  #endif
  
-       itval.it_interval.tv_usec = (1000000 / mono_profiler_get_sampling_rate ()) - 1;
-       itval.it_interval.tv_sec = 0;
-       itval.it_value = itval.it_interval;
-       if (inited)
-               return;
-       inited = 1;
-       profiling_signal_in_use = get_itimer_signal ();
-       add_signal_handler (profiling_signal_in_use, sigprof_signal_handler, SA_RESTART);
-       setitimer (get_itimer_mode (), &itval, NULL);
+       pthread_join (sampling_thread, NULL);
+
+       /*
+        * We can't safely remove the signal handler because we have no guarantee
+        * that all pending signals have been delivered at this point. This should
+        * not really be a problem anyway.
+        */
+       //remove_signal_handler (profiler_signal);
+}
+
+void
+mono_runtime_setup_stat_profiler (void)
+{
+       /*
+        * Use a real-time signal when possible. This gives us roughly a 99% signal
+        * delivery rate in all cases. On the other hand, using a regular signal
+        * tends to result in awful delivery rates when the application is heavily
+        * loaded.
+        *
+        * We avoid real-time signals on Android as they're super broken in certain
+        * API levels (too small sigset_t, nonsensical SIGRTMIN/SIGRTMAX values,
+        * etc).
+        *
+        * TODO: On Mac, we should explore using the Mach thread suspend/resume
+        * functions and doing the stack walk from the sampling thread. This would
+        * get us a 100% sampling rate. However, this may interfere with the GC's
+        * STW logic. Could perhaps be solved by taking the suspend lock.
+        */
+#if defined (USE_POSIX_BACKEND) && defined (SIGRTMIN) && !defined (PLATFORM_ANDROID)
+       /* Just take the first real-time signal we can get. */
+       profiler_signal = mono_threads_posix_signal_search_alternative (-1);
+#else
+       profiler_signal = SIGPROF;
  #endif
+
+       add_signal_handler (profiler_signal, profiler_signal_handler, SA_RESTART);
+
+       mono_counters_register ("Sampling signals sent", MONO_COUNTER_UINT | MONO_COUNTER_PROFILER | MONO_COUNTER_MONOTONIC, &profiler_signals_sent);
+       mono_counters_register ("Sampling signals received", MONO_COUNTER_UINT | MONO_COUNTER_PROFILER | MONO_COUNTER_MONOTONIC, &profiler_signals_received);
+       mono_counters_register ("Sampling signals accepted", MONO_COUNTER_UINT | MONO_COUNTER_PROFILER | MONO_COUNTER_MONOTONIC, &profiler_signals_accepted);
+       mono_counters_register ("Shutdown signals received", MONO_COUNTER_UINT | MONO_COUNTER_PROFILER | MONO_COUNTER_MONOTONIC, &profiler_interrupt_signals_received);
+
+       InterlockedWrite (&sampling_thread_running, 1);
+       mono_native_thread_create (&sampling_thread, sampling_thread_func, NULL);
  }
  
+#else
+
+void
+mono_runtime_shutdown_stat_profiler (void)
+{
+}
+
+void
+mono_runtime_setup_stat_profiler (void)
+{
+}
+
+#endif
+
  #if !defined(PLATFORM_MACOSX)
  pid_t
  mono_runtime_syscall_fork ()