2 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
3 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
4 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
5 * Copyright (c) 2000-2005 by Hewlett-Packard Company. All rights reserved.
7 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
10 * Permission is hereby granted to use or copy this program
11 * for any purpose, provided the above notices are retained on all copies.
12 * Permission to modify the code and to distribute modified code is granted,
13 * provided the above notices are retained, and a notice that the code was
14 * modified is included with the above copyright notice.
17 * Support code originally for LinuxThreads, the clone()-based kernel
18 * thread package for Linux which is included in libc6.
20 * This code no doubt makes some assumptions beyond what is
21 * guaranteed by the pthread standard, though it now does
22 * very little of that. It now also supports NPTL, and many
23 * other Posix thread implementations. We are trying to merge
24 * all flavors of pthread dupport code into this file.
26 /* DG/UX ix86 support <takis@xfree86.org> */
28 * Linux_threads.c now also includes some code to support HPUX and
29 * OSF1 (Compaq Tru64 Unix, really). The OSF1 support is based on Eric Benson's
32 * Eric also suggested an alternate basis for a lock implementation in
34 * + #elif defined(OSF1)
35 * + unsigned long GC_allocate_lock = 0;
36 * + msemaphore GC_allocate_semaphore;
37 * + # define GC_TRY_LOCK() \
38 * + ((msem_lock(&GC_allocate_semaphore, MSEM_IF_NOWAIT) == 0) \
39 * + ? (GC_allocate_lock = 1) \
41 * + # define GC_LOCK_TAKEN GC_allocate_lock
46 /*#define DEBUG_THREADS 1*/
48 # include "private/pthread_support.h"
50 # if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS)
52 # if defined(GC_DGUX386_THREADS) && !defined(_POSIX4A_DRAFT10_SOURCE)
53 # define _POSIX4A_DRAFT10_SOURCE 1
56 # if defined(GC_DGUX386_THREADS) && !defined(_USING_POSIX4A_DRAFT10)
57 # define _USING_POSIX4A_DRAFT10 1
66 # include <sys/mman.h>
67 # include <sys/time.h>
68 # include <sys/types.h>
69 # include <sys/stat.h>
73 # include "gc_inline.h"
75 #if defined(GC_DARWIN_THREADS)
76 # include "private/darwin_semaphore.h"
78 # include <semaphore.h>
79 #endif /* !GC_DARWIN_THREADS */
81 #if defined(GC_DARWIN_THREADS) || defined(GC_FREEBSD_THREADS)
82 # include <sys/sysctl.h>
83 #endif /* GC_DARWIN_THREADS */
85 #if defined(GC_NETBSD_THREADS)
86 # include <sys/param.h>
87 # include <sys/sysctl.h>
88 #endif /* GC_NETBSD_THREADS */
90 /* Allocator lock definitions. */
91 #if !defined(USE_SPIN_LOCK)
92 pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
94 unsigned long GC_lock_holder = NO_THREAD;
95 /* Used only for assertions, and to prevent */
96 /* recursive reentry in the system call wrapper. */
98 #if defined(GC_DGUX386_THREADS)
99 # include <sys/dg_sys_info.h>
100 # include <sys/_int_psem.h>
101 /* sem_t is an uint in DG/UX */
102 typedef unsigned int sem_t;
103 #endif /* GC_DGUX386_THREADS */
109 /* Undefine macros used to redirect pthread primitives. */
110 # undef pthread_create
111 # if !defined(GC_DARWIN_THREADS)
112 # undef pthread_sigmask
115 # undef pthread_detach
116 # if defined(GC_OSF1_THREADS) && defined(_PTHREAD_USE_MANGLED_NAMES_) \
117 && !defined(_PTHREAD_USE_PTDNAM_)
118 /* Restore the original mangled names on Tru64 UNIX. */
119 # define pthread_create __pthread_create
120 # define pthread_join __pthread_join
121 # define pthread_detach __pthread_detach
124 #ifdef GC_USE_LD_WRAP
125 # define WRAP_FUNC(f) __wrap_##f
126 # define REAL_FUNC(f) __real_##f
128 # ifdef GC_USE_DLOPEN_WRAP
130 # define WRAP_FUNC(f) f
131 # define REAL_FUNC(f) GC_real_##f
132 /* We define both GC_f and plain f to be the wrapped function. */
133 /* In that way plain calls work, as do calls from files that */
134 /* included gc.h, wich redefined f to GC_f. */
135 /* FIXME: Needs work for DARWIN and True64 (OSF1) */
136 typedef int (* GC_pthread_create_t)(pthread_t *, const pthread_attr_t *,
137 void * (*)(void *), void *);
138 static GC_pthread_create_t GC_real_pthread_create;
139 typedef int (* GC_pthread_sigmask_t)(int, const sigset_t *, sigset_t *);
140 static GC_pthread_sigmask_t GC_real_pthread_sigmask;
141 typedef int (* GC_pthread_join_t)(pthread_t, void **);
142 static GC_pthread_join_t GC_real_pthread_join;
143 typedef int (* GC_pthread_detach_t)(pthread_t);
144 static GC_pthread_detach_t GC_real_pthread_detach;
146 # define WRAP_FUNC(f) GC_##f
147 # if !defined(GC_DGUX386_THREADS)
148 # define REAL_FUNC(f) f
149 # else /* GC_DGUX386_THREADS */
150 # define REAL_FUNC(f) __d10_##f
151 # endif /* GC_DGUX386_THREADS */
155 #if defined(GC_USE_DL_WRAP) || defined(GC_USE_DLOPEN_WRAP)
156 /* Define GC_ functions as aliases for the plain ones, which will */
157 /* be intercepted. This allows files which include gc.h, and hence */
158 /* generate references to the GC_ symbols, to see the right symbols. */
159 int GC_pthread_create(pthread_t * t, const pthread_attr_t * a,
160 void * (* fn)(void *), void * arg) {
161 return pthread_create(t, a, fn, arg);
163 int GC_pthread_sigmask(int how, const sigset_t *mask, sigset_t *old) {
164 return pthread_sigmask(how, mask, old);
166 int GC_pthread_join(pthread_t t, void **res) {
167 return pthread_join(t, res);
169 int GC_pthread_detach(pthread_t t) {
170 return pthread_detach(t);
172 #endif /* Linker-based interception. */
174 #ifdef GC_USE_DLOPEN_WRAP
175 static GC_bool GC_syms_initialized = FALSE;
177 void GC_init_real_syms(void)
180 # define LIBPTHREAD_NAME "libpthread.so.0"
181 # define LIBPTHREAD_NAME_LEN 16 /* incl. trailing 0 */
182 size_t len = LIBPTHREAD_NAME_LEN - 1;
183 char namebuf[LIBPTHREAD_NAME_LEN];
184 static char *libpthread_name = LIBPTHREAD_NAME;
186 if (GC_syms_initialized) return;
188 dl_handle = RTLD_NEXT;
190 dl_handle = dlopen(libpthread_name, RTLD_LAZY);
191 if (NULL == dl_handle) {
192 while (isdigit(libpthread_name[len-1])) --len;
193 if (libpthread_name[len-1] == '.') --len;
194 memcpy(namebuf, libpthread_name, len);
196 dl_handle = dlopen(namebuf, RTLD_LAZY);
198 if (NULL == dl_handle) ABORT("Couldn't open libpthread\n");
200 GC_real_pthread_create = (GC_pthread_create_t)
201 dlsym(dl_handle, "pthread_create");
202 GC_real_pthread_sigmask = (GC_pthread_sigmask_t)
203 dlsym(dl_handle, "pthread_sigmask");
204 GC_real_pthread_join = (GC_pthread_join_t)
205 dlsym(dl_handle, "pthread_join");
206 GC_real_pthread_detach = (GC_pthread_detach_t)
207 dlsym(dl_handle, "pthread_detach");
208 GC_syms_initialized = TRUE;
211 # define INIT_REAL_SYMS() if (!GC_syms_initialized) GC_init_real_syms();
213 # define INIT_REAL_SYMS()
216 void GC_thr_init(void);
218 static GC_bool parallel_initialized = FALSE;
220 GC_bool GC_need_to_lock = FALSE;
222 void GC_init_parallel(void);
224 long GC_nprocs = 1; /* Number of processors. We may not have */
225 /* access to all of them, but this is as good */
226 /* a guess as any ... */
228 #ifdef THREAD_LOCAL_ALLOC
229 /* We must explicitly mark ptrfree and gcj free lists, since the free */
230 /* list links wouldn't otherwise be found. We also set them in the */
231 /* normal free lists, since that involves touching less memory than if */
232 /* we scanned them normally. */
233 void GC_mark_thread_local_free_lists(void)
238 for (i = 0; i < THREAD_TABLE_SZ; ++i) {
239 for (p = GC_threads[i]; 0 != p; p = p -> next) {
240 GC_mark_thread_local_fls_for(&(p->tlfs));
245 #if defined(GC_ASSERTIONS)
246 /* Check that all thread-local free-lists are completely marked. */
247 /* also check that thread-specific-data structures are marked. */
248 void GC_check_tls(void) {
252 for (i = 0; i < THREAD_TABLE_SZ; ++i) {
253 for (p = GC_threads[i]; 0 != p; p = p -> next) {
254 GC_check_tls_for(&(p->tlfs));
257 # if defined(USE_CUSTOM_SPECIFIC)
258 if (GC_thread_key != 0)
259 GC_check_tsd_marks(GC_thread_key);
262 #endif /* GC_ASSERTIONS */
264 #endif /* Thread_local_alloc */
269 # define MAX_MARKERS 16
272 static ptr_t marker_sp[MAX_MARKERS] = {0};
274 static ptr_t marker_bsp[MAX_MARKERS] = {0};
277 void * GC_mark_thread(void * id)
281 marker_sp[(word)id] = GC_approx_sp();
283 marker_bsp[(word)id] = GC_save_regs_in_stack();
285 for (;; ++my_mark_no) {
286 /* GC_mark_no is passed only to allow GC_help_marker to terminate */
287 /* promptly. This is important if it were called from the signal */
288 /* handler or from the GC lock acquisition code. Under Linux, it's */
289 /* not safe to call it from a signal handler, since it uses mutexes */
290 /* and condition variables. Since it is called only here, the */
291 /* argument is unnecessary. */
292 if (my_mark_no < GC_mark_no || my_mark_no > GC_mark_no + 2) {
293 /* resynchronize if we get far off, e.g. because GC_mark_no */
295 my_mark_no = GC_mark_no;
297 # ifdef DEBUG_THREADS
298 GC_printf("Starting mark helper for mark number %lu\n", my_mark_no);
300 GC_help_marker(my_mark_no);
304 extern long GC_markers; /* Number of mark threads we would */
305 /* like to have. Includes the */
306 /* initiating thread. */
308 pthread_t GC_mark_threads[MAX_MARKERS];
310 #define PTHREAD_CREATE REAL_FUNC(pthread_create)
312 static void start_mark_threads(void)
317 if (GC_markers > MAX_MARKERS) {
318 WARN("Limiting number of mark threads\n", 0);
319 GC_markers = MAX_MARKERS;
321 if (0 != pthread_attr_init(&attr)) ABORT("pthread_attr_init failed");
323 if (0 != pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
324 ABORT("pthread_attr_setdetachstate failed");
326 # if defined(HPUX) || defined(GC_DGUX386_THREADS)
327 /* Default stack size is usually too small: fix it. */
328 /* Otherwise marker threads or GC may run out of */
330 # define MIN_STACK_SIZE (8*HBLKSIZE*sizeof(word))
335 if (pthread_attr_getstacksize(&attr, &old_size) != 0)
336 ABORT("pthread_attr_getstacksize failed\n");
337 if (old_size < MIN_STACK_SIZE) {
338 if (pthread_attr_setstacksize(&attr, MIN_STACK_SIZE) != 0)
339 ABORT("pthread_attr_setstacksize failed\n");
342 # endif /* HPUX || GC_DGUX386_THREADS */
343 if (GC_print_stats) {
344 GC_log_printf("Starting %ld marker threads\n", GC_markers - 1);
346 for (i = 0; i < GC_markers - 1; ++i) {
347 if (0 != PTHREAD_CREATE(GC_mark_threads + i, &attr,
348 GC_mark_thread, (void *)(word)i)) {
349 WARN("Marker thread creation failed, errno = %ld.\n", errno);
354 #endif /* PARALLEL_MARK */
356 GC_bool GC_thr_initialized = FALSE;
358 volatile GC_thread GC_threads[THREAD_TABLE_SZ];
360 void GC_push_thread_structures(void)
362 GC_ASSERT(I_HOLD_LOCK());
363 GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
364 # if defined(THREAD_LOCAL_ALLOC)
365 GC_push_all((ptr_t)(&GC_thread_key),
366 (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
370 /* It may not be safe to allocate when we register the first thread. */
371 static struct GC_Thread_Rep first_thread;
373 /* Add a thread to GC_threads. We assume it wasn't already there. */
374 /* Caller holds allocation lock. */
375 GC_thread GC_new_thread(pthread_t id)
377 int hv = NUMERIC_THREAD_ID(id) % THREAD_TABLE_SZ;
379 static GC_bool first_thread_used = FALSE;
381 GC_ASSERT(I_HOLD_LOCK());
382 if (!first_thread_used) {
383 result = &first_thread;
384 first_thread_used = TRUE;
386 result = (struct GC_Thread_Rep *)
387 GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL);
388 GC_ASSERT(result -> flags == 0);
390 if (result == 0) return(0);
392 result -> next = GC_threads[hv];
393 GC_threads[hv] = result;
394 GC_ASSERT(result -> flags == 0 && result -> thread_blocked == 0);
398 /* Delete a thread from GC_threads. We assume it is there. */
399 /* (The code intentionally traps if it wasn't.) */
400 void GC_delete_thread(pthread_t id)
402 int hv = NUMERIC_THREAD_ID(id) % THREAD_TABLE_SZ;
403 register GC_thread p = GC_threads[hv];
404 register GC_thread prev = 0;
406 GC_ASSERT(I_HOLD_LOCK());
407 while (!THREAD_EQUAL(p -> id, id)) {
412 GC_threads[hv] = p -> next;
414 prev -> next = p -> next;
416 # ifdef GC_DARWIN_THREADS
417 mach_port_deallocate(mach_task_self(), p->stop_info.mach_thread);
422 /* If a thread has been joined, but we have not yet */
423 /* been notified, then there may be more than one thread */
424 /* in the table with the same pthread id. */
425 /* This is OK, but we need a way to delete a specific one. */
426 void GC_delete_gc_thread(GC_thread gc_id)
428 pthread_t id = gc_id -> id;
429 int hv = NUMERIC_THREAD_ID(id) % THREAD_TABLE_SZ;
430 register GC_thread p = GC_threads[hv];
431 register GC_thread prev = 0;
433 GC_ASSERT(I_HOLD_LOCK());
439 GC_threads[hv] = p -> next;
441 prev -> next = p -> next;
443 # ifdef GC_DARWIN_THREADS
444 mach_port_deallocate(mach_task_self(), p->stop_info.mach_thread);
449 /* Return a GC_thread corresponding to a given pthread_t. */
450 /* Returns 0 if it's not there. */
451 /* Caller holds allocation lock or otherwise inhibits */
453 /* If there is more than one thread with the given id we */
454 /* return the most recent one. */
455 GC_thread GC_lookup_thread(pthread_t id)
457 int hv = NUMERIC_THREAD_ID(id) % THREAD_TABLE_SZ;
458 register GC_thread p = GC_threads[hv];
460 while (p != 0 && !THREAD_EQUAL(p -> id, id)) p = p -> next;
465 /* Remove all entries from the GC_threads table, except the */
466 /* one for the current thread. We need to do this in the child */
467 /* process after a fork(), since only the current thread */
468 /* survives in the child. */
469 void GC_remove_all_threads_but_me(void)
471 pthread_t self = pthread_self();
473 GC_thread p, next, me;
475 for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) {
477 for (p = GC_threads[hv]; 0 != p; p = next) {
479 if (THREAD_EQUAL(p -> id, self)) {
483 # ifdef THREAD_LOCAL_ALLOC
484 if (!(p -> flags & FINISHED)) {
485 GC_destroy_thread_local(&(p->tlfs));
487 # endif /* THREAD_LOCAL_ALLOC */
488 if (p != &first_thread) GC_INTERNAL_FREE(p);
494 #endif /* HANDLE_FORK */
496 #ifdef USE_PROC_FOR_LIBRARIES
497 GC_bool GC_segment_is_thread_stack(ptr_t lo, ptr_t hi)
502 GC_ASSERT(I_HOLD_LOCK());
503 # ifdef PARALLEL_MARK
504 for (i = 0; i < GC_markers; ++i) {
505 if (marker_sp[i] > lo & marker_sp[i] < hi) return TRUE;
507 if (marker_bsp[i] > lo & marker_bsp[i] < hi) return TRUE;
511 for (i = 0; i < THREAD_TABLE_SZ; i++) {
512 for (p = GC_threads[i]; p != 0; p = p -> next) {
513 if (0 != p -> stack_end) {
514 # ifdef STACK_GROWS_UP
515 if (p -> stack_end >= lo && p -> stack_end < hi) return TRUE;
516 # else /* STACK_GROWS_DOWN */
517 if (p -> stack_end > lo && p -> stack_end <= hi) return TRUE;
524 #endif /* USE_PROC_FOR_LIBRARIES */
527 /* Find the largest stack_base smaller than bound. May be used */
528 /* to find the boundary between a register stack and adjacent */
529 /* immediately preceding memory stack. */
530 ptr_t GC_greatest_stack_base_below(ptr_t bound)
536 GC_ASSERT(I_HOLD_LOCK());
537 # ifdef PARALLEL_MARK
538 for (i = 0; i < GC_markers; ++i) {
539 if (marker_sp[i] > result && marker_sp[i] < bound)
540 result = marker_sp[i];
543 for (i = 0; i < THREAD_TABLE_SZ; i++) {
544 for (p = GC_threads[i]; p != 0; p = p -> next) {
545 if (p -> stack_end > result && p -> stack_end < bound) {
546 result = p -> stack_end;
554 #ifdef GC_LINUX_THREADS
555 /* Return the number of processors, or i<= 0 if it can't be determined. */
556 int GC_get_nprocs(void)
558 /* Should be "return sysconf(_SC_NPROCESSORS_ONLN);" but that */
559 /* appears to be buggy in many cases. */
560 /* We look for lines "cpu<n>" in /proc/stat. */
561 # define STAT_BUF_SIZE 4096
562 # define STAT_READ read
563 /* If read is wrapped, this may need to be redefined to call */
565 char stat_buf[STAT_BUF_SIZE];
568 /* Some old kernels only have a single "cpu nnnn ..." */
569 /* entry in /proc/stat. We identify those as */
573 f = open("/proc/stat", O_RDONLY);
574 if (f < 0 || (len = STAT_READ(f, stat_buf, STAT_BUF_SIZE)) < 100) {
575 WARN("Couldn't read /proc/stat\n", 0);
578 for (i = 0; i < len - 100; ++i) {
579 if (stat_buf[i] == '\n' && stat_buf[i+1] == 'c'
580 && stat_buf[i+2] == 'p' && stat_buf[i+3] == 'u') {
581 int cpu_no = atoi(stat_buf + i + 4);
582 if (cpu_no >= result) result = cpu_no + 1;
588 #endif /* GC_LINUX_THREADS */
590 /* We hold the GC lock. Wait until an in-progress GC has finished. */
591 /* Repeatedly RELEASES GC LOCK in order to wait. */
592 /* If wait_for_all is true, then we exit with the GC lock held and no */
593 /* collection in progress; otherwise we just wait for the current GC */
595 extern GC_bool GC_collection_in_progress(void);
596 void GC_wait_for_gc_completion(GC_bool wait_for_all)
598 GC_ASSERT(I_HOLD_LOCK());
599 if (GC_incremental && GC_collection_in_progress()) {
600 int old_gc_no = GC_gc_no;
602 /* Make sure that no part of our stack is still on the mark stack, */
603 /* since it's about to be unmapped. */
604 while (GC_incremental && GC_collection_in_progress()
605 && (wait_for_all || old_gc_no == GC_gc_no)) {
607 GC_in_thread_creation = TRUE;
608 GC_collect_a_little_inner(1);
609 GC_in_thread_creation = FALSE;
619 /* Procedures called before and after a fork. The goal here is to make */
620 /* it safe to call GC_malloc() in a forked child. It's unclear that is */
621 /* attainable, since the single UNIX spec seems to imply that one */
622 /* should only call async-signal-safe functions, and we probably can't */
623 /* quite guarantee that. But we give it our best shot. (That same */
624 /* spec also implies that it's not safe to call the system malloc */
625 /* between fork() and exec(). Thus we're doing no worse than it. */
627 /* Called before a fork() */
628 void GC_fork_prepare_proc(void)
630 /* Acquire all relevant locks, so that after releasing the locks */
631 /* the child will see a consistent state in which monitor */
632 /* invariants hold. Unfortunately, we can't acquire libc locks */
633 /* we might need, and there seems to be no guarantee that libc */
634 /* must install a suitable fork handler. */
635 /* Wait for an ongoing GC to finish, since we can't finish it in */
636 /* the (one remaining thread in) the child. */
638 # if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
639 GC_wait_for_reclaim();
641 GC_wait_for_gc_completion(TRUE);
642 # if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
643 GC_acquire_mark_lock();
647 /* Called in parent after a fork() */
648 void GC_fork_parent_proc(void)
650 # if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
651 GC_release_mark_lock();
656 /* Called in child after a fork() */
657 void GC_fork_child_proc(void)
659 /* Clean up the thread table, so that just our thread is left. */
660 # if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
661 GC_release_mark_lock();
663 GC_remove_all_threads_but_me();
664 # ifdef PARALLEL_MARK
665 /* Turn off parallel marking in the child, since we are probably */
666 /* just going to exec, and we would have to restart mark threads. */
669 # endif /* PARALLEL_MARK */
672 #endif /* HANDLE_FORK */
674 #if defined(GC_DGUX386_THREADS)
675 /* Return the number of processors, or i<= 0 if it can't be determined. */
676 int GC_get_nprocs(void)
678 /* <takis@XFree86.Org> */
680 struct dg_sys_info_pm_info pm_sysinfo;
683 status = dg_sys_info((long int *) &pm_sysinfo,
684 DG_SYS_INFO_PM_INFO_TYPE, DG_SYS_INFO_PM_CURRENT_VERSION);
686 /* set -1 for error */
690 numCpus = pm_sysinfo.idle_vp_count;
692 # ifdef DEBUG_THREADS
693 GC_printf("Number of active CPUs in this system: %d\n", numCpus);
697 #endif /* GC_DGUX386_THREADS */
699 #if defined(GC_NETBSD_THREADS)
700 static int get_ncpu(void)
702 int mib[] = {CTL_HW,HW_NCPU};
704 size_t len = sizeof(res);
706 sysctl(mib, sizeof(mib)/sizeof(int), &res, &len, NULL, 0);
709 #endif /* GC_NETBSD_THREADS */
711 # if defined(GC_LINUX_THREADS) && defined(INCLUDE_LINUX_THREAD_DESCR)
712 __thread int dummy_thread_local;
715 /* We hold the allocation lock. */
716 void GC_thr_init(void)
718 # ifndef GC_DARWIN_THREADS
723 if (GC_thr_initialized) return;
724 GC_thr_initialized = TRUE;
727 /* Prepare for a possible fork. */
728 pthread_atfork(GC_fork_prepare_proc, GC_fork_parent_proc,
730 # endif /* HANDLE_FORK */
731 # if defined(INCLUDE_LINUX_THREAD_DESCR)
732 /* Explicitly register the region including the address */
733 /* of a thread local variable. This should include thread */
734 /* locals for the main thread, except for those allocated */
735 /* in response to dlopen calls. */
737 ptr_t thread_local_addr = (ptr_t)(&dummy_thread_local);
738 ptr_t main_thread_start, main_thread_end;
739 if (!GC_enclosing_mapping(thread_local_addr, &main_thread_start,
741 ABORT("Failed to find mapping for main thread thread locals");
743 GC_add_roots_inner(main_thread_start, main_thread_end, FALSE);
746 /* Add the initial thread, so we can stop it. */
747 t = GC_new_thread(pthread_self());
748 # ifdef GC_DARWIN_THREADS
749 t -> stop_info.mach_thread = mach_thread_self();
751 t -> stop_info.stack_ptr = (ptr_t)(&dummy);
753 t -> flags = DETACHED | MAIN_THREAD;
759 char * nprocs_string = GETENV("GC_NPROCS");
761 if (nprocs_string != NULL) GC_nprocs = atoi(nprocs_string);
763 if (GC_nprocs <= 0) {
764 # if defined(GC_HPUX_THREADS)
765 GC_nprocs = pthread_num_processors_np();
767 # if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS) \
768 || defined(GC_SOLARIS_THREADS) || defined(GC_GNU_THREADS)
769 GC_nprocs = sysconf(_SC_NPROCESSORS_ONLN);
770 if (GC_nprocs <= 0) GC_nprocs = 1;
772 # if defined(GC_IRIX_THREADS)
773 GC_nprocs = sysconf(_SC_NPROC_ONLN);
774 if (GC_nprocs <= 0) GC_nprocs = 1;
776 # if defined(GC_NETBSD_THREADS)
777 GC_nprocs = get_ncpu();
779 # if defined(GC_DARWIN_THREADS) || defined(GC_FREEBSD_THREADS)
781 size_t len = sizeof(ncpus);
782 sysctl((int[2]) {CTL_HW, HW_NCPU}, 2, &ncpus, &len, NULL, 0);
785 # if defined(GC_LINUX_THREADS) || defined(GC_DGUX386_THREADS)
786 GC_nprocs = GC_get_nprocs();
789 if (GC_nprocs <= 0) {
790 WARN("GC_get_nprocs() returned %ld\n", GC_nprocs);
792 # ifdef PARALLEL_MARK
796 # ifdef PARALLEL_MARK
798 char * markers_string = GETENV("GC_MARKERS");
799 if (markers_string != NULL) {
800 GC_markers = atoi(markers_string);
802 GC_markers = GC_nprocs;
807 # ifdef PARALLEL_MARK
808 if (GC_print_stats) {
809 GC_log_printf("Number of processors = %ld, "
810 "number of marker threads = %ld\n", GC_nprocs, GC_markers);
812 if (GC_markers == 1) {
814 if (GC_print_stats) {
816 "Single marker thread, turning off parallel marking\n");
820 /* Disable true incremental collection, but generational is OK. */
821 GC_time_limit = GC_TIME_UNLIMITED;
823 /* If we are using a parallel marker, actually start helper threads. */
824 if (GC_parallel) start_mark_threads();
829 /* Perform all initializations, including those that */
830 /* may require allocation. */
831 /* Called without allocation lock. */
832 /* Must be called before a second thread is created. */
833 /* Did we say it's called without the allocation lock? */
834 void GC_init_parallel(void)
836 if (parallel_initialized) return;
837 parallel_initialized = TRUE;
839 /* GC_init() calls us back, so set flag first. */
840 if (!GC_is_initialized) GC_init();
841 /* Initialize thread local free lists if used. */
842 # if defined(THREAD_LOCAL_ALLOC)
844 GC_init_thread_local(&(GC_lookup_thread(pthread_self())->tlfs));
850 #if !defined(GC_DARWIN_THREADS)
851 int WRAP_FUNC(pthread_sigmask)(int how, const sigset_t *set, sigset_t *oset)
856 if (set != NULL && (how == SIG_BLOCK || how == SIG_SETMASK)) {
858 sigdelset(&fudged_set, SIG_SUSPEND);
861 return(REAL_FUNC(pthread_sigmask)(how, set, oset));
863 #endif /* !GC_DARWIN_THREADS */
865 /* Wrapper for functions that are likely to block for an appreciable */
866 /* length of time. */
868 struct blocking_data {
873 static void GC_do_blocking_inner(ptr_t data, void * context) {
874 struct blocking_data * d = (struct blocking_data *) data;
877 me = GC_lookup_thread(pthread_self());
878 GC_ASSERT(!(me -> thread_blocked));
880 me -> stop_info.stack_ptr = GC_save_regs_in_stack();
881 # elif !defined(GC_DARWIN_THREADS)
882 me -> stop_info.stack_ptr = GC_approx_sp();
885 me -> backing_store_ptr = GC_save_regs_in_stack();
887 me -> thread_blocked = TRUE;
888 /* Save context here if we want to support precise stack marking */
891 LOCK(); /* This will block if the world is stopped. */
892 me -> thread_blocked = FALSE;
896 void GC_do_blocking(void (*fn)(void *), void *arg) {
897 struct blocking_data my_data;
901 GC_with_callee_saves_pushed(GC_do_blocking_inner, (ptr_t)(&my_data));
905 void *(*start_routine)(void *);
908 sem_t registered; /* 1 ==> in our thread table, but */
909 /* parent hasn't yet noticed. */
912 int GC_unregister_my_thread(void)
917 /* Wait for any GC that may be marking from our stack to */
918 /* complete before we remove this thread. */
919 GC_wait_for_gc_completion(FALSE);
920 me = GC_lookup_thread(pthread_self());
921 # if defined(THREAD_LOCAL_ALLOC)
922 GC_destroy_thread_local(&(me->tlfs));
924 if (me -> flags & DETACHED) {
925 GC_delete_thread(pthread_self());
927 me -> flags |= FINISHED;
929 # if defined(THREAD_LOCAL_ALLOC)
930 GC_remove_specific(GC_thread_key);
936 /* Called at thread exit. */
937 /* Never called for main thread. That's OK, since it */
938 /* results in at most a tiny one-time leak. And */
939 /* linuxthreads doesn't reclaim the main threads */
940 /* resources or id anyway. */
941 void GC_thread_exit_proc(void *arg)
943 GC_unregister_my_thread();
946 int WRAP_FUNC(pthread_join)(pthread_t thread, void **retval)
949 GC_thread thread_gc_id;
953 thread_gc_id = GC_lookup_thread(thread);
954 /* This is guaranteed to be the intended one, since the thread id */
955 /* cant have been recycled by pthreads. */
957 result = REAL_FUNC(pthread_join)(thread, retval);
958 # if defined (GC_FREEBSD_THREADS)
959 /* On FreeBSD, the wrapped pthread_join() sometimes returns (what
960 appears to be) a spurious EINTR which caused the test and real code
961 to gratuitously fail. Having looked at system pthread library source
962 code, I see how this return code may be generated. In one path of
963 code, pthread_join() just returns the errno setting of the thread
964 being joined. This does not match the POSIX specification or the
965 local man pages thus I have taken the liberty to catch this one
966 spurious return value properly conditionalized on GC_FREEBSD_THREADS. */
967 if (result == EINTR) result = 0;
971 /* Here the pthread thread id may have been recycled. */
972 GC_delete_gc_thread(thread_gc_id);
979 WRAP_FUNC(pthread_detach)(pthread_t thread)
982 GC_thread thread_gc_id;
986 thread_gc_id = GC_lookup_thread(thread);
988 result = REAL_FUNC(pthread_detach)(thread);
991 thread_gc_id -> flags |= DETACHED;
992 /* Here the pthread thread id may have been recycled. */
993 if (thread_gc_id -> flags & FINISHED) {
994 GC_delete_gc_thread(thread_gc_id);
1001 GC_bool GC_in_thread_creation = FALSE; /* Protected by allocation lock. */
1003 GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
1004 pthread_t my_pthread)
1008 GC_in_thread_creation = TRUE; /* OK to collect from unknown thread. */
1009 me = GC_new_thread(my_pthread);
1010 GC_in_thread_creation = FALSE;
1011 # ifdef GC_DARWIN_THREADS
1012 me -> stop_info.mach_thread = mach_thread_self();
1014 me -> stop_info.stack_ptr = sb -> mem_base;
1016 me -> stack_end = sb -> mem_base;
1018 me -> backing_store_end = sb -> reg_base;
1023 int GC_register_my_thread(struct GC_stack_base *sb)
1025 pthread_t my_pthread = pthread_self();
1029 me = GC_lookup_thread(my_pthread);
1031 me = GC_register_my_thread_inner(sb, my_pthread);
1032 me -> flags |= DETACHED;
1033 /* Treat as detached, since we do not need to worry about */
1034 /* pointer results. */
1039 return GC_DUPLICATE;
1043 void * GC_inner_start_routine(struct GC_stack_base *sb, void * arg)
1045 struct start_info * si = arg;
1048 pthread_t my_pthread;
1049 void *(*start)(void *);
1052 my_pthread = pthread_self();
1053 # ifdef DEBUG_THREADS
1054 GC_printf("Starting thread 0x%x\n", (unsigned)my_pthread);
1055 GC_printf("pid = %ld\n", (long) getpid());
1056 GC_printf("sp = 0x%lx\n", (long) &arg);
1059 me = GC_register_my_thread_inner(sb, my_pthread);
1060 me -> flags = si -> flags;
1062 start = si -> start_routine;
1063 # ifdef DEBUG_THREADS
1064 GC_printf("start_routine = %p\n", (void *)start);
1066 start_arg = si -> arg;
1067 sem_post(&(si -> registered)); /* Last action on si. */
1068 /* OK to deallocate. */
1069 pthread_cleanup_push(GC_thread_exit_proc, 0);
1070 # if defined(THREAD_LOCAL_ALLOC)
1072 GC_init_thread_local(&(me->tlfs));
1075 result = (*start)(start_arg);
1077 GC_printf("Finishing thread 0x%x\n", (unsigned)pthread_self());
1079 me -> status = result;
1080 pthread_cleanup_pop(1);
1081 /* Cleanup acquires lock, ensuring that we can't exit */
1082 /* while a collection that thinks we're alive is trying to stop */
1087 void * GC_start_routine(void * arg)
1089 # ifdef INCLUDE_LINUX_THREAD_DESCR
1090 struct GC_stack_base sb;
1092 # ifdef REDIRECT_MALLOC
1093 /* GC_get_stack_base may call pthread_getattr_np, which can */
1094 /* unfortunately call realloc, which may allocate from an */
1095 /* unregistered thread. This is unpleasant, since it might */
1096 /* force heap growth. */
1099 if (GC_get_stack_base(&sb) != GC_SUCCESS)
1100 ABORT("Failed to get thread stack base.");
1101 # ifdef REDIRECT_MALLOC
1104 return GC_inner_start_routine(&sb, arg);
1106 return GC_call_with_stack_base(GC_inner_start_routine, arg);
1111 WRAP_FUNC(pthread_create)(pthread_t *new_thread,
1112 const pthread_attr_t *attr,
1113 void *(*start_routine)(void *), void *arg)
1118 struct start_info * si;
1119 /* This is otherwise saved only in an area mmapped by the thread */
1120 /* library, which isn't visible to the collector. */
1122 /* We resist the temptation to muck with the stack size here, */
1123 /* even if the default is unreasonably small. That's the client's */
1124 /* responsibility. */
1128 si = (struct start_info *)GC_INTERNAL_MALLOC(sizeof(struct start_info),
1131 if (!parallel_initialized) GC_init_parallel();
1132 if (0 == si) return(ENOMEM);
1133 sem_init(&(si -> registered), 0, 0);
1134 si -> start_routine = start_routine;
1137 if (!GC_thr_initialized) GC_thr_init();
1138 # ifdef GC_ASSERTIONS
1140 size_t stack_size = 0;
1142 pthread_attr_getstacksize(attr, &stack_size);
1144 if (0 == stack_size) {
1145 pthread_attr_t my_attr;
1146 pthread_attr_init(&my_attr);
1147 pthread_attr_getstacksize(&my_attr, &stack_size);
1149 /* On Solaris 10, with default attr initialization, */
1150 /* stack_size remains 0. Fudge it. */
1151 if (0 == stack_size) {
1153 WARN("Failed to get stack size for assertion checking\n", 0);
1155 stack_size = 1000000;
1157 # ifdef PARALLEL_MARK
1158 GC_ASSERT(stack_size >= (8*HBLKSIZE*sizeof(word)));
1160 /* FreeBSD-5.3/Alpha: default pthread stack is 64K, */
1161 /* HBLKSIZE=8192, sizeof(word)=8 */
1162 GC_ASSERT(stack_size >= 65536);
1164 /* Our threads may need to do some work for the GC. */
1165 /* Ridiculously small threads won't work, and they */
1166 /* probably wouldn't work anyway. */
1170 detachstate = PTHREAD_CREATE_JOINABLE;
1172 pthread_attr_getdetachstate(attr, &detachstate);
1174 if (PTHREAD_CREATE_DETACHED == detachstate) my_flags |= DETACHED;
1175 si -> flags = my_flags;
1177 # ifdef DEBUG_THREADS
1178 GC_printf("About to start new thread from thread 0x%x\n",
1179 (unsigned)pthread_self());
1181 GC_need_to_lock = TRUE;
1183 result = REAL_FUNC(pthread_create)(new_thread, attr, GC_start_routine, si);
1185 # ifdef DEBUG_THREADS
1186 GC_printf("Started thread 0x%x\n", (unsigned)(*new_thread));
1188 /* Wait until child has been added to the thread table. */
1189 /* This also ensures that we hold onto si until the child is done */
1190 /* with it. Thus it doesn't matter whether it is otherwise */
1191 /* visible to the collector. */
1193 while (0 != sem_wait(&(si -> registered))) {
1194 if (EINTR != errno) ABORT("sem_wait failed");
1197 sem_destroy(&(si -> registered));
1199 GC_INTERNAL_FREE(si);
1205 /* Spend a few cycles in a way that can't introduce contention with */
1206 /* othre threads. */
1210 # if !defined(__GNUC__) || defined(__INTEL_COMPILER)
1211 volatile word dummy = 0;
1214 for (i = 0; i < 10; ++i) {
1215 # if defined(__GNUC__) && !defined(__INTEL_COMPILER)
1216 __asm__ __volatile__ (" " : : : "memory");
1218 /* Something that's unlikely to be optimized away. */
1224 #define SPIN_MAX 128 /* Maximum number of calls to GC_pause before */
1227 volatile GC_bool GC_collecting = 0;
1228 /* A hint that we're in the collector and */
1229 /* holding the allocation lock for an */
1230 /* extended period. */
1232 #if !defined(USE_SPIN_LOCK) || defined(PARALLEL_MARK)
1233 /* If we don't want to use the below spinlock implementation, either */
1234 /* because we don't have a GC_test_and_set implementation, or because */
1235 /* we don't want to risk sleeping, we can still try spinning on */
1236 /* pthread_mutex_trylock for a while. This appears to be very */
1237 /* beneficial in many cases. */
1238 /* I suspect that under high contention this is nearly always better */
1239 /* than the spin lock. But it's a bit slower on a uniprocessor. */
1240 /* Hence we still default to the spin lock. */
1241 /* This is also used to acquire the mark lock for the parallel */
1244 /* Here we use a strict exponential backoff scheme. I don't know */
1245 /* whether that's better or worse than the above. We eventually */
1246 /* yield by calling pthread_mutex_lock(); it never makes sense to */
1247 /* explicitly sleep. */
1251 unsigned long GC_spin_count = 0;
1252 unsigned long GC_block_count = 0;
1253 unsigned long GC_unlocked_count = 0;
1256 void GC_generic_lock(pthread_mutex_t * lock)
1258 #ifndef NO_PTHREAD_TRYLOCK
1259 unsigned pause_length = 1;
1262 if (0 == pthread_mutex_trylock(lock)) {
1264 ++GC_unlocked_count;
1268 for (; pause_length <= SPIN_MAX; pause_length <<= 1) {
1269 for (i = 0; i < pause_length; ++i) {
1272 switch(pthread_mutex_trylock(lock)) {
1281 ABORT("Unexpected error from pthread_mutex_trylock");
1284 #endif /* !NO_PTHREAD_TRYLOCK */
1288 pthread_mutex_lock(lock);
1291 #endif /* !USE_SPIN_LOCK || PARALLEL_MARK */
1293 #if defined(USE_SPIN_LOCK)
1295 /* Reasonably fast spin locks. Basically the same implementation */
1296 /* as STL alloc.h. This isn't really the right way to do this. */
1297 /* but until the POSIX scheduling mess gets straightened out ... */
1299 volatile AO_TS_t GC_allocate_lock = 0;
1304 # define low_spin_max 30 /* spin cycles if we suspect uniprocessor */
1305 # define high_spin_max SPIN_MAX /* spin cycles for multiprocessor */
1306 static unsigned spin_max = low_spin_max;
1307 unsigned my_spin_max;
1308 static unsigned last_spins = 0;
1309 unsigned my_last_spins;
1312 if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_CLEAR) {
1315 my_spin_max = spin_max;
1316 my_last_spins = last_spins;
1317 for (i = 0; i < my_spin_max; i++) {
1318 if (GC_collecting || GC_nprocs == 1) goto yield;
1319 if (i < my_last_spins/2) {
1323 if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_CLEAR) {
1326 * Spinning worked. Thus we're probably not being scheduled
1327 * against the other process with which we were contending.
1328 * Thus it makes sense to spin longer the next time.
1331 spin_max = high_spin_max;
1335 /* We are probably being scheduled against the other process. Sleep. */
1336 spin_max = low_spin_max;
1339 if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_CLEAR) {
1342 # define SLEEP_THRESHOLD 12
1343 /* Under Linux very short sleeps tend to wait until */
1344 /* the current time quantum expires. On old Linux */
1345 /* kernels nanosleep(<= 2ms) just spins under Linux. */
1346 /* (Under 2.4, this happens only for real-time */
1347 /* processes.) We want to minimize both behaviors */
1349 if (i < SLEEP_THRESHOLD) {
1355 /* Don't wait for more than about 15msecs, even */
1356 /* under extreme contention. */
1358 ts.tv_nsec = 1 << i;
1364 #else /* !USE_SPINLOCK */
1367 #ifndef NO_PTHREAD_TRYLOCK
1368 if (1 == GC_nprocs || GC_collecting) {
1369 pthread_mutex_lock(&GC_allocate_ml);
1371 GC_generic_lock(&GC_allocate_ml);
1373 #else /* !NO_PTHREAD_TRYLOCK */
1374 pthread_mutex_lock(&GC_allocate_ml);
1375 #endif /* !NO_PTHREAD_TRYLOCK */
1378 #endif /* !USE_SPINLOCK */
1380 #if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
1382 #ifdef GC_ASSERTIONS
1383 unsigned long GC_mark_lock_holder = NO_THREAD;
1387 /* Ugly workaround for a linux threads bug in the final versions */
1388 /* of glibc2.1. Pthread_mutex_trylock sets the mutex owner */
1389 /* field even when it fails to acquire the mutex. This causes */
1390 /* pthread_cond_wait to die. Remove for glibc2.2. */
1391 /* According to the man page, we should use */
1392 /* PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP, but that isn't actually */
1394 static pthread_mutex_t mark_mutex =
1395 {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, {0, 0}};
1397 static pthread_mutex_t mark_mutex = PTHREAD_MUTEX_INITIALIZER;
1400 static pthread_cond_t builder_cv = PTHREAD_COND_INITIALIZER;
1402 void GC_acquire_mark_lock(void)
1405 if (pthread_mutex_lock(&mark_mutex) != 0) {
1406 ABORT("pthread_mutex_lock failed");
1409 GC_generic_lock(&mark_mutex);
1410 # ifdef GC_ASSERTIONS
1411 GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self());
1415 void GC_release_mark_lock(void)
1417 GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self()));
1418 # ifdef GC_ASSERTIONS
1419 GC_mark_lock_holder = NO_THREAD;
1421 if (pthread_mutex_unlock(&mark_mutex) != 0) {
1422 ABORT("pthread_mutex_unlock failed");
1426 /* Collector must wait for a freelist builders for 2 reasons: */
1427 /* 1) Mark bits may still be getting examined without lock. */
1428 /* 2) Partial free lists referenced only by locals may not be scanned */
1429 /* correctly, e.g. if they contain "pointer-free" objects, since the */
1430 /* free-list link may be ignored. */
1431 void GC_wait_builder(void)
1433 GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self()));
1434 # ifdef GC_ASSERTIONS
1435 GC_mark_lock_holder = NO_THREAD;
1437 if (pthread_cond_wait(&builder_cv, &mark_mutex) != 0) {
1438 ABORT("pthread_cond_wait failed");
1440 GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
1441 # ifdef GC_ASSERTIONS
1442 GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self());
1446 void GC_wait_for_reclaim(void)
1448 GC_acquire_mark_lock();
1449 while (GC_fl_builder_count > 0) {
1452 GC_release_mark_lock();
1455 void GC_notify_all_builder(void)
1457 GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self()));
1458 if (pthread_cond_broadcast(&builder_cv) != 0) {
1459 ABORT("pthread_cond_broadcast failed");
1463 #endif /* PARALLEL_MARK || THREAD_LOCAL_ALLOC */
1465 #ifdef PARALLEL_MARK
1467 static pthread_cond_t mark_cv = PTHREAD_COND_INITIALIZER;
1469 void GC_wait_marker(void)
1471 GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self()));
1472 # ifdef GC_ASSERTIONS
1473 GC_mark_lock_holder = NO_THREAD;
1475 if (pthread_cond_wait(&mark_cv, &mark_mutex) != 0) {
1476 ABORT("pthread_cond_wait failed");
1478 GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
1479 # ifdef GC_ASSERTIONS
1480 GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self());
1484 void GC_notify_all_marker(void)
1486 if (pthread_cond_broadcast(&mark_cv) != 0) {
1487 ABORT("pthread_cond_broadcast failed");
1491 #endif /* PARALLEL_MARK */
1493 # endif /* GC_LINUX_THREADS and friends */