Handle a race condition between thread registration and sgen's stop the world. Fixes...
authorRodrigo Kumpera <kumpera@gmail.com>
Sat, 12 May 2012 12:58:13 +0000 (09:58 -0300)
committerRodrigo Kumpera <kumpera@gmail.com>
Fri, 18 May 2012 20:10:33 +0000 (17:10 -0300)
Since new threads can come and go while we're stopping the world, we must
ignore the new ones as they are not supposed to be usable before this STW
cycle.

Since thread registration takes the GC lock, once STW starts all new threads
can't register and thus touch managed memory. This means we can ignore all
of those not seen in the first suspend pass.

This bug and the patch was a contribution by Sam Lang.

Fixes #1917.

mono/metadata/sgen-gc.c
mono/metadata/sgen-gc.h
mono/metadata/sgen-os-mach.c
mono/metadata/sgen-os-posix.c
mono/metadata/sgen-os-win32.c

index 16f47f7e1131bd8e2aaf057d672de2b504187a6c..cd25cf56db0e08a1ac0f9d257bc2fdf9ed0723fe 100644 (file)
@@ -3673,7 +3673,7 @@ restart_threads_until_none_in_managed_allocator (void)
                   allocator */
                FOREACH_THREAD_SAFE (info) {
                        gboolean result;
-                       if (info->skip || info->gc_disabled)
+                       if (info->skip || info->gc_disabled || !info->joined_stw)
                                continue;
                        if (!info->thread_is_dying && (!info->stack_start || info->in_critical_region ||
                                        is_ip_in_managed_allocator (info->stopped_domain, info->stopped_ip))) {
@@ -3901,6 +3901,12 @@ scan_thread_data (void *start_nursery, void *end_nursery, gboolean precise, Gray
                        DEBUG (3, fprintf (gc_debug_file, "GC disabled for thread %p, range: %p-%p, size: %td\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
                        continue;
                }
+
+               if (!info->joined_stw) {
+                       DEBUG (3, fprintf (gc_debug_file, "Skipping thread not seen in STW %p, range: %p-%p, size: %td\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
+                       continue;
+               }
+               
                DEBUG (3, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %td, pinned=%d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start, sgen_get_pinned_count ()));
                if (!info->thread_is_dying) {
                        if (gc_callbacks.thread_mark_func && !conservative_stack_mark) {
@@ -3989,6 +3995,7 @@ sgen_thread_register (SgenThreadInfo* info, void *addr)
        info->signal = 0;
 #endif
        info->skip = 0;
+       info->joined_stw = FALSE;
        info->doing_handshake = FALSE;
        info->thread_is_dying = FALSE;
        info->stack_start = NULL;
@@ -4776,7 +4783,6 @@ mono_gc_base_init (void)
                else {
                        fprintf (stderr, "Unknown minor collector `%s'.\n", minor_collector_opt);
                        exit (1);
-                       
                }
        }
 
index 9556fa8a5433c1024a10fd04466edf13874aeb8c..fc423166805919a35cfb1429fe01bd5e4c34c689 100644 (file)
@@ -113,6 +113,7 @@ struct _SgenThreadInfo {
 #endif
        int skip;
        volatile int in_critical_region;
+       gboolean joined_stw;
        gboolean doing_handshake;
        gboolean thread_is_dying;
        gboolean gc_disabled;
index f12e1697cc24778f46296032d00e7757c56d0fba..56e0ffef0feb692f2ff5c1389e05f1797c3e9f7c 100644 (file)
@@ -117,6 +117,8 @@ sgen_thread_handshake (BOOL suspend)
        int count = 0;
 
        FOREACH_THREAD_SAFE (info) {
+               info->joined_stw = suspend;
+
                if (info == cur_thread || sgen_is_worker_thread (mono_thread_info_get_tid (info)))
                        continue;
                if (info->gc_disabled)
index 63055f782caf18383a857d31256da20c1f801cab..cc75c3a3530b0ebb6c600e842cae1c49ca7ef203 100644 (file)
@@ -225,6 +225,7 @@ sgen_thread_handshake (BOOL suspend)
 
        count = 0;
        FOREACH_THREAD_SAFE (info) {
+               info->joined_stw = suspend;
                if (mono_native_thread_id_equals (mono_thread_info_get_tid (info), me)) {
                        continue;
                }
index 43e8eef01aab38d7e95def2d046be002f65aa6ac..a8a9d17b3f3a2046d3fb3a2ebe9d0206c956adb3 100644 (file)
@@ -91,6 +91,7 @@ sgen_thread_handshake (BOOL suspend)
        int count = 0;
 
        FOREACH_THREAD_SAFE (info) {
+               info->joined_stw = suspend;
                if (info == current)
                        continue;
                if (info->gc_disabled)
@@ -108,7 +109,6 @@ sgen_thread_handshake (BOOL suspend)
                        if (!sgen_resume_thread (info))
                                continue;
                }
-
                ++count;
        } END_FOREACH_THREAD_SAFE
        return count;