X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Futils%2Fmono-proclib.c;h=efd732d6d56b7592b9de709a4d34c352e42baf8f;hb=9afab4092501a7e7e240a2dd9ed0892d1e0821de;hp=63a56324ba755c45ff024664b02bfd354156f075;hpb=65b4aaf118b2982f2a2db38ad338ab6b6901b36f;p=mono.git diff --git a/mono/utils/mono-proclib.c b/mono/utils/mono-proclib.c index 63a56324ba7..efd732d6d56 100644 --- a/mono/utils/mono-proclib.c +++ b/mono/utils/mono-proclib.c @@ -1,6 +1,7 @@ /* * Copyright 2008-2011 Novell Inc * Copyright 2011 Xamarin Inc + * Licensed under the MIT license. See LICENSE file in the project root for full license information. */ #include "config.h" @@ -26,6 +27,7 @@ #if defined(_POSIX_VERSION) #include #include +#include #ifdef HAVE_SYS_TYPES_H #include #endif @@ -323,7 +325,7 @@ mono_process_get_times (gpointer pid, gint64 *start_time, gint64 *user_time, gin if (*start_time == 0) { static guint64 boot_time = 0; if (!boot_time) - boot_time = mono_100ns_datetime () - ((guint64)mono_msec_ticks ()) * 10000; + boot_time = mono_100ns_datetime () - mono_msec_boottime () * 10000; *start_time = boot_time + mono_process_get_data (pid, MONO_PROCESS_ELAPSED); } @@ -354,22 +356,35 @@ get_process_stat_item (int pid, int pos, int sum, MonoProcessError *error) mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT, th_count; thread_array_t th_array; size_t i; + kern_return_t ret; if (pid == getpid ()) { /* task_for_pid () doesn't work on ios, even for the current process */ task = mach_task_self (); } else { - if (task_for_pid (mach_task_self (), pid, &task) != KERN_SUCCESS) + do { + ret = task_for_pid (mach_task_self (), pid, &task); + } while (ret == KERN_ABORTED); + + if (ret != KERN_SUCCESS) RET_ERROR (MONO_PROCESS_ERROR_NOT_FOUND); } - if (task_info (task, TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count) != KERN_SUCCESS) { + do { + ret = task_info (task, TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count); + } while (ret == KERN_ABORTED); + + if (ret != KERN_SUCCESS) { if (pid != getpid ()) mach_port_deallocate (mach_task_self (), task); RET_ERROR (MONO_PROCESS_ERROR_OTHER); } + + do { + ret = task_threads (task, &th_array, &th_count); + } while (ret == KERN_ABORTED); - if (task_threads(task, &th_array, &th_count) != KERN_SUCCESS) { + if (ret != KERN_SUCCESS) { if (pid != getpid ()) mach_port_deallocate (mach_task_self (), task); RET_ERROR (MONO_PROCESS_ERROR_OTHER); @@ -380,7 +395,11 @@ get_process_stat_item (int pid, int pos, int sum, MonoProcessError *error) struct thread_basic_info th_info; mach_msg_type_number_t th_info_count = THREAD_BASIC_INFO_COUNT; - if (thread_info(th_array[i], THREAD_BASIC_INFO, (thread_info_t)&th_info, &th_info_count) == KERN_SUCCESS) { + do { + ret = thread_info(th_array[i], THREAD_BASIC_INFO, (thread_info_t)&th_info, &th_info_count); + } while (ret == KERN_ABORTED); + + if (ret == KERN_SUCCESS) { thread_user_time = th_info.user_time.seconds + th_info.user_time.microseconds / 1e6; thread_system_time = th_info.system_time.seconds + th_info.system_time.microseconds / 1e6; //thread_percent = (double)th_info.cpu_usage / TH_USAGE_SCALE; @@ -493,16 +512,25 @@ get_pid_status_item (int pid, const char *item, MonoProcessError *error, int mul task_t task; struct task_basic_info t_info; mach_msg_type_number_t th_count = TASK_BASIC_INFO_COUNT; + kern_return_t mach_ret; if (pid == getpid ()) { /* task_for_pid () doesn't work on ios, even for the current process */ task = mach_task_self (); } else { - if (task_for_pid (mach_task_self (), pid, &task) != KERN_SUCCESS) + do { + mach_ret = task_for_pid (mach_task_self (), pid, &task); + } while (mach_ret == KERN_ABORTED); + + if (mach_ret != KERN_SUCCESS) RET_ERROR (MONO_PROCESS_ERROR_NOT_FOUND); } - - if (task_info (task, TASK_BASIC_INFO, (task_info_t)&t_info, &th_count) != KERN_SUCCESS) { + + do { + mach_ret = task_info (task, TASK_BASIC_INFO, (task_info_t)&t_info, &th_count); + } while (mach_ret == KERN_ABORTED); + + if (mach_ret != KERN_SUCCESS) { if (pid != getpid ()) mach_port_deallocate (mach_task_self (), task); RET_ERROR (MONO_PROCESS_ERROR_OTHER); @@ -622,12 +650,12 @@ mono_cpu_count (void) GetSystemInfo (&info); return info.dwNumberOfProcessors; #else - int count = 0; #ifdef PLATFORM_ANDROID /* Android tries really hard to save power by powering off CPUs on SMP phones which * means the normal way to query cpu count returns a wrong value with userspace API. * Instead we use /sys entries to query the actual hardware CPU count. */ + int count = 0; char buffer[8] = {'\0'}; int present = open ("/sys/devices/system/cpu/present", O_RDONLY); /* Format of the /sys entry is a cpulist of indexes which in the case @@ -642,6 +670,75 @@ mono_cpu_count (void) if (count > 0) return count + 1; #endif + +#if defined(HOST_ARM) || defined (HOST_ARM64) + +/* + * Recap from Alexander Köplinger : + * + * When we merged the change from PR #2722, we started seeing random failures on ARM in + * the MonoTests.System.Threading.ThreadPoolTests.SetAndGetMaxThreads and + * MonoTests.System.Threading.ManualResetEventSlimTests.Constructor_Defaults tests. Both + * of those tests are dealing with Environment.ProcessorCount to verify some implementation + * details. + * + * It turns out that on the Jetson TK1 board we use on public Jenkins and on ARM kernels + * in general, the value returned by sched_getaffinity (or _SC_NPROCESSORS_ONLN) doesn't + * contain CPUs/cores that are powered off for power saving reasons. This is contrary to + * what happens on x86, where even cores in deep-sleep state are returned [1], [2]. This + * means that we would get a processor count of 1 at one point in time and a higher value + * when load increases later on as the system wakes CPUs. + * + * Various runtime pieces like the threadpool and also user code however relies on the + * value returned by Environment.ProcessorCount e.g. for deciding how many parallel tasks + * to start, thereby limiting the performance when that code thinks we only have one CPU. + * + * Talking to a few people, this was the reason why we changed to _SC_NPROCESSORS_CONF in + * mono#1688 and why we added a special case for Android in mono@de3addc to get the "real" + * number of processors in the system. + * + * Because of those issues Android/Dalvik also switched from _ONLN to _SC_NPROCESSORS_CONF + * for the Java API Runtime.availableProcessors() too [3], citing: + * > Traditionally this returned the number currently online, but many mobile devices are + * able to take unused cores offline to save power, so releases newer than Android 4.2 (Jelly + * Bean) return the maximum number of cores that could be made available if there were no + * power or heat constraints. + * + * The problem with sticking to _SC_NPROCESSORS_CONF however is that it breaks down in + * constrained environments like Docker or with an explicit CPU affinity set by the Linux + * `taskset` command, They'd get a higher CPU count than can be used, start more threads etc. + * which results in unnecessary context switches and overloaded systems. That's why we need + * to respect sched_getaffinity. + * + * So while in an ideal world we would be able to rely on sched_getaffinity/_SC_NPROCESSORS_ONLN + * to return the number of theoretically available CPUs regardless of power saving measures + * everywhere, we can't do this on ARM. + * + * I think the pragmatic solution is the following: + * * use sched_getaffinity (+ fallback to _SC_NPROCESSORS_ONLN in case of error) on x86. This + * ensures we're inline with what OpenJDK [4] and CoreCLR [5] do + * * use _SC_NPROCESSORS_CONF exclusively on ARM (I think we could eventually even get rid of + * the PLATFORM_ANDROID special case) + * + * Helpful links: + * + * [1] https://sourceware.org/ml/libc-alpha/2013-07/msg00383.html + * [2] https://lists.01.org/pipermail/powertop/2012-September/000433.html + * [3] https://android.googlesource.com/platform/libcore/+/750dc634e56c58d1d04f6a138734ac2b772900b5%5E1..750dc634e56c58d1d04f6a138734ac2b772900b5/ + * [4] https://bugs.openjdk.java.net/browse/JDK-6515172 + * [5] https://github.com/dotnet/coreclr/blob/7058273693db2555f127ce16e6b0c5b40fb04867/src/pal/src/misc/sysinfo.cpp#L148 + */ + +#ifdef _SC_NPROCESSORS_CONF + { + int count = sysconf (_SC_NPROCESSORS_CONF); + if (count > 0) + return count; + } +#endif + +#else + #ifdef HAVE_SCHED_GETAFFINITY { cpu_set_t set; @@ -649,13 +746,19 @@ mono_cpu_count (void) return CPU_COUNT (&set); } #endif -#ifdef _SC_NPROCESSORS_CONF - count = sysconf (_SC_NPROCESSORS_CONF); - if (count > 0) - return count; +#ifdef _SC_NPROCESSORS_ONLN + { + int count = sysconf (_SC_NPROCESSORS_ONLN); + if (count > 0) + return count; + } #endif + +#endif /* defined(HOST_ARM) || defined (HOST_ARM64) */ + #ifdef USE_SYSCTL { + int count; int mib [2]; size_t len = sizeof (int); mib [0] = CTL_HW; @@ -664,7 +767,7 @@ mono_cpu_count (void) return count; } #endif -#endif +#endif /* HOST_WIN32 */ /* FIXME: warn */ return 1; }