src/mm/boehm-gc/pthread_support.c

   1 /*
   2  * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
   3  * Copyright (c) 1996 by Silicon Graphics.  All rights reserved.
   4  * Copyright (c) 1998 by Fergus Henderson.  All rights reserved.
   5  * Copyright (c) 2000-2005 by Hewlett-Packard Company.  All rights reserved.
   6  *
   7  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
   8  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
   9  *
  10  * Permission is hereby granted to use or copy this program
  11  * for any purpose,  provided the above notices are retained on all copies.
  12  * Permission to modify the code and to distribute modified code is granted,
  13  * provided the above notices are retained, and a notice that the code was
  14  * modified is included with the above copyright notice.
  15  */
  16 /*
  17  * Support code originally for LinuxThreads, the clone()-based kernel
  18  * thread package for Linux which is included in libc6.
  19  *
  20  * This code no doubt makes some assumptions beyond what is
  21  * guaranteed by the pthread standard, though it now does
  22  * very little of that.  It now also supports NPTL, and many
  23  * other Posix thread implementations.  We are trying to merge
  24  * all flavors of pthread dupport code into this file.
  25  */
  26  /* DG/UX ix86 support <takis@xfree86.org> */
  27 /*
  28  * Linux_threads.c now also includes some code to support HPUX and
  29  * OSF1 (Compaq Tru64 Unix, really).  The OSF1 support is based on Eric Benson's
  30  * patch.
  31  *
  32  * Eric also suggested an alternate basis for a lock implementation in
  33  * his code:
  34  * + #elif defined(OSF1)
  35  * +    unsigned long GC_allocate_lock = 0;
  36  * +    msemaphore GC_allocate_semaphore;
  37  * + #  define GC_TRY_LOCK() \
  38  * +    ((msem_lock(&GC_allocate_semaphore, MSEM_IF_NOWAIT) == 0) \
  39  * +     ? (GC_allocate_lock = 1) \
  40  * +     : 0)
  41  * + #  define GC_LOCK_TAKEN GC_allocate_lock
  42  */
  43
  44 #include "config.h"
  45
  46 /*#define DEBUG_THREADS 1*/
  47
  48 # include "private/pthread_support.h"
  49
  50 # if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS)
  51
  52 # if defined(GC_DGUX386_THREADS) && !defined(_POSIX4A_DRAFT10_SOURCE)
  53 #   define _POSIX4A_DRAFT10_SOURCE 1
  54 # endif
  55
  56 # if defined(GC_DGUX386_THREADS) && !defined(_USING_POSIX4A_DRAFT10)
  57 #   define _USING_POSIX4A_DRAFT10 1
  58 # endif
  59
  60 # include <stdlib.h>
  61 # include <pthread.h>
  62 # include <sched.h>
  63 # include <time.h>
  64 # include <errno.h>
  65 # include <unistd.h>
  66 # include <sys/mman.h>
  67 # include <sys/time.h>
  68 # include <sys/types.h>
  69 # include <sys/stat.h>
  70 # include <fcntl.h>
  71 # include <signal.h>
  72
  73 # include "gc_inline.h"
  74
  75 #if defined(GC_DARWIN_THREADS)
  76 # include "private/darwin_semaphore.h"
  77 #else
  78 # include <semaphore.h>
  79 #endif /* !GC_DARWIN_THREADS */
  80
  81 #if defined(GC_DARWIN_THREADS) || defined(GC_FREEBSD_THREADS)
  82 # include <sys/sysctl.h>
  83 #endif /* GC_DARWIN_THREADS */
  84
  85 #if defined(GC_NETBSD_THREADS)
  86 # include <sys/param.h>
  87 # include <sys/sysctl.h>
  88 #endif        /* GC_NETBSD_THREADS */
  89
  90 /* Allocator lock definitions.          */
  91 #if !defined(USE_SPIN_LOCK)
  92   pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
  93 #endif
  94 unsigned long GC_lock_holder = NO_THREAD;
  95                 /* Used only for assertions, and to prevent      */
  96                 /* recursive reentry in the system call wrapper. */
  97
  98 #if defined(GC_DGUX386_THREADS)
  99 # include <sys/dg_sys_info.h>
 100 # include <sys/_int_psem.h>
 101   /* sem_t is an uint in DG/UX */
 102   typedef unsigned int  sem_t;
 103 #endif /* GC_DGUX386_THREADS */
 104
 105 #ifndef __GNUC__
 106 #   define __inline__
 107 #endif
 108
 109 /* Undefine macros used to redirect pthread primitives. */
 110 # undef pthread_create
 111 # if !defined(GC_DARWIN_THREADS)
 112 #   undef pthread_sigmask
 113 # endif
 114 # undef pthread_join
 115 # undef pthread_detach
 116 # if defined(GC_OSF1_THREADS) && defined(_PTHREAD_USE_MANGLED_NAMES_) \
 117      && !defined(_PTHREAD_USE_PTDNAM_)
 118   /* Restore the original mangled names on Tru64 UNIX.  */
 119 #   define pthread_create __pthread_create
 120 #   define pthread_join __pthread_join
 121 #   define pthread_detach __pthread_detach
 122 # endif
 123
 124 #ifdef GC_USE_LD_WRAP
 125 #   define WRAP_FUNC(f) __wrap_##f
 126 #   define REAL_FUNC(f) __real_##f
 127 #else
 128 #   ifdef GC_USE_DLOPEN_WRAP
 129 #     include <dlfcn.h>
 130 #     define WRAP_FUNC(f) f
 131 #     define REAL_FUNC(f) GC_real_##f
 132       /* We define both GC_f and plain f to be the wrapped function.    */
 133       /* In that way plain calls work, as do calls from files that      */
 134       /* included gc.h, wich redefined f to GC_f.                       */
 135       /* FIXME: Needs work for DARWIN and True64 (OSF1) */
 136       typedef int (* GC_pthread_create_t)(pthread_t *, const pthread_attr_t *,
 137                                           void * (*)(void *), void *);
 138       static GC_pthread_create_t GC_real_pthread_create;
 139       typedef int (* GC_pthread_sigmask_t)(int, const sigset_t *, sigset_t *);
 140       static GC_pthread_sigmask_t GC_real_pthread_sigmask;
 141       typedef int (* GC_pthread_join_t)(pthread_t, void **);
 142       static GC_pthread_join_t GC_real_pthread_join;
 143       typedef int (* GC_pthread_detach_t)(pthread_t);
 144       static GC_pthread_detach_t GC_real_pthread_detach;
 145 #   else
 146 #     define WRAP_FUNC(f) GC_##f
 147 #     if !defined(GC_DGUX386_THREADS)
 148 #       define REAL_FUNC(f) f
 149 #     else /* GC_DGUX386_THREADS */
 150 #       define REAL_FUNC(f) __d10_##f
 151 #     endif /* GC_DGUX386_THREADS */
 152 #   endif
 153 #endif
 154
 155 #if defined(GC_USE_DL_WRAP) || defined(GC_USE_DLOPEN_WRAP)
 156 /* Define GC_ functions as aliases for the plain ones, which will       */
 157 /* be intercepted.  This allows files which include gc.h, and hence     */
 158 /* generate references to the GC_ symbols, to see the right symbols.    */
 159       int GC_pthread_create(pthread_t * t, const pthread_attr_t * a,
 160                          void * (* fn)(void *), void * arg) {
 161           return pthread_create(t, a, fn, arg);
 162       }
 163       int GC_pthread_sigmask(int how, const sigset_t *mask, sigset_t *old) {
 164           return pthread_sigmask(how, mask, old);
 165       }
 166       int GC_pthread_join(pthread_t t, void **res) {
 167           return pthread_join(t, res);
 168       }
 169       int GC_pthread_detach(pthread_t t) {
 170           return pthread_detach(t);
 171       }
 172 #endif /* Linker-based interception. */
 173
 174 #ifdef GC_USE_DLOPEN_WRAP
 175   static GC_bool GC_syms_initialized = FALSE;
 176
 177   void GC_init_real_syms(void)
 178   {
 179     void *dl_handle;
 180 #   define LIBPTHREAD_NAME "libpthread.so.0"
 181 #   define LIBPTHREAD_NAME_LEN 16 /* incl. trailing 0 */
 182     size_t len = LIBPTHREAD_NAME_LEN - 1;
 183     char namebuf[LIBPTHREAD_NAME_LEN];
 184     static char *libpthread_name = LIBPTHREAD_NAME;
 185
 186     if (GC_syms_initialized) return;
 187 #   ifdef RTLD_NEXT
 188       dl_handle = RTLD_NEXT;
 189 #   else
 190       dl_handle = dlopen(libpthread_name, RTLD_LAZY);
 191       if (NULL == dl_handle) {
 192         while (isdigit(libpthread_name[len-1])) --len;
 193         if (libpthread_name[len-1] == '.') --len;
 194         memcpy(namebuf, libpthread_name, len);
 195         namebuf[len] = '\0';
 196         dl_handle = dlopen(namebuf, RTLD_LAZY);
 197       }
 198       if (NULL == dl_handle) ABORT("Couldn't open libpthread\n");
 199 #   endif
 200     GC_real_pthread_create = (GC_pthread_create_t)
 201                                 dlsym(dl_handle, "pthread_create");
 202     GC_real_pthread_sigmask = (GC_pthread_sigmask_t)
 203                                 dlsym(dl_handle, "pthread_sigmask");
 204     GC_real_pthread_join = (GC_pthread_join_t)
 205                                 dlsym(dl_handle, "pthread_join");
 206     GC_real_pthread_detach = (GC_pthread_detach_t)
 207                                 dlsym(dl_handle, "pthread_detach");
 208     GC_syms_initialized = TRUE;
 209   }
 210
 211 # define INIT_REAL_SYMS() if (!GC_syms_initialized) GC_init_real_syms();
 212 #else
 213 # define INIT_REAL_SYMS()
 214 #endif
 215
 216 void GC_thr_init(void);
 217
 218 static GC_bool parallel_initialized = FALSE;
 219
 220 GC_bool GC_need_to_lock = FALSE;
 221
 222 void GC_init_parallel(void);
 223
 224 long GC_nprocs = 1;     /* Number of processors.  We may not have       */
 225                         /* access to all of them, but this is as good   */
 226                         /* a guess as any ...                           */
 227
 228 #ifdef THREAD_LOCAL_ALLOC
 229 /* We must explicitly mark ptrfree and gcj free lists, since the free   */
 230 /* list links wouldn't otherwise be found.  We also set them in the     */
 231 /* normal free lists, since that involves touching less memory than if  */
 232 /* we scanned them normally.                                            */
 233 void GC_mark_thread_local_free_lists(void)
 234 {
 235     int i;
 236     GC_thread p;
 237
 238     for (i = 0; i < THREAD_TABLE_SZ; ++i) {
 239       for (p = GC_threads[i]; 0 != p; p = p -> next) {
 240         GC_mark_thread_local_fls_for(&(p->tlfs));
 241       }
 242     }
 243 }
 244
 245 #if defined(GC_ASSERTIONS)
 246     /* Check that all thread-local free-lists are completely marked.    */
 247     /* also check that thread-specific-data structures are marked.      */
 248     void GC_check_tls(void) {
 249         int i;
 250         GC_thread p;
 251
 252         for (i = 0; i < THREAD_TABLE_SZ; ++i) {
 253           for (p = GC_threads[i]; 0 != p; p = p -> next) {
 254             GC_check_tls_for(&(p->tlfs));
 255           }
 256         }
 257 #       if defined(USE_CUSTOM_SPECIFIC)
 258           if (GC_thread_key != 0)
 259             GC_check_tsd_marks(GC_thread_key);
 260 #       endif
 261     }
 262 #endif /* GC_ASSERTIONS */
 263
 264 #endif /* Thread_local_alloc */
 265
 266 #ifdef PARALLEL_MARK
 267
 268 # ifndef MAX_MARKERS
 269 #   define MAX_MARKERS 16
 270 # endif
 271
 272 static ptr_t marker_sp[MAX_MARKERS] = {0};
 273 #ifdef IA64
 274   static ptr_t marker_bsp[MAX_MARKERS] = {0};
 275 #endif
 276
 277 void * GC_mark_thread(void * id)
 278 {
 279   word my_mark_no = 0;
 280
 281   marker_sp[(word)id] = GC_approx_sp();
 282 # ifdef IA64
 283     marker_bsp[(word)id] = GC_save_regs_in_stack();
 284 # endif
 285   for (;; ++my_mark_no) {
 286     /* GC_mark_no is passed only to allow GC_help_marker to terminate   */
 287     /* promptly.  This is important if it were called from the signal   */
 288     /* handler or from the GC lock acquisition code.  Under Linux, it's */
 289     /* not safe to call it from a signal handler, since it uses mutexes */
 290     /* and condition variables.  Since it is called only here, the      */
 291     /* argument is unnecessary.                                         */
 292     if (my_mark_no < GC_mark_no || my_mark_no > GC_mark_no + 2) {
 293         /* resynchronize if we get far off, e.g. because GC_mark_no     */
 294         /* wrapped.                                                     */
 295         my_mark_no = GC_mark_no;
 296     }
 297 #   ifdef DEBUG_THREADS
 298         GC_printf("Starting mark helper for mark number %lu\n", my_mark_no);
 299 #   endif
 300     GC_help_marker(my_mark_no);
 301   }
 302 }
 303
 304 extern long GC_markers;         /* Number of mark threads we would      */
 305                                 /* like to have.  Includes the          */
 306                                 /* initiating thread.                   */
 307
 308 pthread_t GC_mark_threads[MAX_MARKERS];
 309
 310 #define PTHREAD_CREATE REAL_FUNC(pthread_create)
 311
 312 static void start_mark_threads(void)
 313 {
 314     unsigned i;
 315     pthread_attr_t attr;
 316
 317     if (GC_markers > MAX_MARKERS) {
 318         WARN("Limiting number of mark threads\n", 0);
 319         GC_markers = MAX_MARKERS;
 320     }
 321     if (0 != pthread_attr_init(&attr)) ABORT("pthread_attr_init failed");
 322
 323     if (0 != pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
 324         ABORT("pthread_attr_setdetachstate failed");
 325
 326 #   if defined(HPUX) || defined(GC_DGUX386_THREADS)
 327       /* Default stack size is usually too small: fix it. */
 328       /* Otherwise marker threads or GC may run out of    */
 329       /* space.                                           */
 330 #     define MIN_STACK_SIZE (8*HBLKSIZE*sizeof(word))
 331       {
 332         size_t old_size;
 333         int code;
 334
 335         if (pthread_attr_getstacksize(&attr, &old_size) != 0)
 336           ABORT("pthread_attr_getstacksize failed\n");
 337         if (old_size < MIN_STACK_SIZE) {
 338           if (pthread_attr_setstacksize(&attr, MIN_STACK_SIZE) != 0)
 339                   ABORT("pthread_attr_setstacksize failed\n");
 340         }
 341       }
 342 #   endif /* HPUX || GC_DGUX386_THREADS */
 343     if (GC_print_stats) {
 344         GC_log_printf("Starting %ld marker threads\n", GC_markers - 1);
 345     }
 346     for (i = 0; i < GC_markers - 1; ++i) {
 347       if (0 != PTHREAD_CREATE(GC_mark_threads + i, &attr,
 348                               GC_mark_thread, (void *)(word)i)) {
 349         WARN("Marker thread creation failed, errno = %ld.\n", errno);
 350       }
 351     }
 352 }
 353
 354 #endif /* PARALLEL_MARK */
 355
 356 GC_bool GC_thr_initialized = FALSE;
 357
 358 volatile GC_thread GC_threads[THREAD_TABLE_SZ];
 359
 360 void GC_push_thread_structures(void)
 361 {
 362     GC_ASSERT(I_HOLD_LOCK());
 363     GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
 364 #   if defined(THREAD_LOCAL_ALLOC)
 365       GC_push_all((ptr_t)(&GC_thread_key),
 366           (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
 367 #   endif
 368 }
 369
 370 /* It may not be safe to allocate when we register the first thread.    */
 371 static struct GC_Thread_Rep first_thread;
 372
 373 /* Add a thread to GC_threads.  We assume it wasn't already there.      */
 374 /* Caller holds allocation lock.                                        */
 375 GC_thread GC_new_thread(pthread_t id)
 376 {
 377     int hv = NUMERIC_THREAD_ID(id) % THREAD_TABLE_SZ;
 378     GC_thread result;
 379     static GC_bool first_thread_used = FALSE;
 380
 381     GC_ASSERT(I_HOLD_LOCK());
 382     if (!first_thread_used) {
 383         result = &first_thread;
 384         first_thread_used = TRUE;
 385     } else {
 386         result = (struct GC_Thread_Rep *)
 387                  GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL);
 388         GC_ASSERT(result -> flags == 0);
 389     }
 390     if (result == 0) return(0);
 391     result -> id = id;
 392     result -> next = GC_threads[hv];
 393     GC_threads[hv] = result;
 394     GC_ASSERT(result -> flags == 0 && result -> thread_blocked == 0);
 395     return(result);
 396 }
 397
 398 /* Delete a thread from GC_threads.  We assume it is there.     */
 399 /* (The code intentionally traps if it wasn't.)                 */
 400 void GC_delete_thread(pthread_t id)
 401 {
 402     int hv = NUMERIC_THREAD_ID(id) % THREAD_TABLE_SZ;
 403     register GC_thread p = GC_threads[hv];
 404     register GC_thread prev = 0;
 405
 406     GC_ASSERT(I_HOLD_LOCK());
 407     while (!THREAD_EQUAL(p -> id, id)) {
 408         prev = p;
 409         p = p -> next;
 410     }
 411     if (prev == 0) {
 412         GC_threads[hv] = p -> next;
 413     } else {
 414         prev -> next = p -> next;
 415     }
 416 #   ifdef GC_DARWIN_THREADS
 417         mach_port_deallocate(mach_task_self(), p->stop_info.mach_thread);
 418 #   endif
 419     GC_INTERNAL_FREE(p);
 420 }
 421
 422 /* If a thread has been joined, but we have not yet             */
 423 /* been notified, then there may be more than one thread        */
 424 /* in the table with the same pthread id.                       */
 425 /* This is OK, but we need a way to delete a specific one.      */
 426 void GC_delete_gc_thread(GC_thread gc_id)
 427 {
 428     pthread_t id = gc_id -> id;
 429     int hv = NUMERIC_THREAD_ID(id) % THREAD_TABLE_SZ;
 430     register GC_thread p = GC_threads[hv];
 431     register GC_thread prev = 0;
 432
 433     GC_ASSERT(I_HOLD_LOCK());
 434     while (p != gc_id) {
 435         prev = p;
 436         p = p -> next;
 437     }
 438     if (prev == 0) {
 439         GC_threads[hv] = p -> next;
 440     } else {
 441         prev -> next = p -> next;
 442     }
 443 #   ifdef GC_DARWIN_THREADS
 444         mach_port_deallocate(mach_task_self(), p->stop_info.mach_thread);
 445 #   endif
 446     GC_INTERNAL_FREE(p);
 447 }
 448
 449 /* Return a GC_thread corresponding to a given pthread_t.       */
 450 /* Returns 0 if it's not there.                                 */
 451 /* Caller holds  allocation lock or otherwise inhibits          */
 452 /* updates.                                                     */
 453 /* If there is more than one thread with the given id we        */
 454 /* return the most recent one.                                  */
 455 GC_thread GC_lookup_thread(pthread_t id)
 456 {
 457     int hv = NUMERIC_THREAD_ID(id) % THREAD_TABLE_SZ;
 458     register GC_thread p = GC_threads[hv];
 459
 460     while (p != 0 && !THREAD_EQUAL(p -> id, id)) p = p -> next;
 461     return(p);
 462 }
 463
 464 #ifdef HANDLE_FORK
 465 /* Remove all entries from the GC_threads table, except the     */
 466 /* one for the current thread.  We need to do this in the child */
 467 /* process after a fork(), since only the current thread        */
 468 /* survives in the child.                                       */
 469 void GC_remove_all_threads_but_me(void)
 470 {
 471     pthread_t self = pthread_self();
 472     int hv;
 473     GC_thread p, next, me;
 474
 475     for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) {
 476       me = 0;
 477       for (p = GC_threads[hv]; 0 != p; p = next) {
 478         next = p -> next;
 479         if (THREAD_EQUAL(p -> id, self)) {
 480           me = p;
 481           p -> next = 0;
 482         } else {
 483 #         ifdef THREAD_LOCAL_ALLOC
 484             if (!(p -> flags & FINISHED)) {
 485               GC_destroy_thread_local(&(p->tlfs));
 486             }
 487 #         endif /* THREAD_LOCAL_ALLOC */
 488           if (p != &first_thread) GC_INTERNAL_FREE(p);
 489         }
 490       }
 491       GC_threads[hv] = me;
 492     }
 493 }
 494 #endif /* HANDLE_FORK */
 495
 496 #ifdef USE_PROC_FOR_LIBRARIES
 497 GC_bool GC_segment_is_thread_stack(ptr_t lo, ptr_t hi)
 498 {
 499     int i;
 500     GC_thread p;
 501
 502     GC_ASSERT(I_HOLD_LOCK());
 503 #   ifdef PARALLEL_MARK
 504       for (i = 0; i < GC_markers; ++i) {
 505         if (marker_sp[i] > lo & marker_sp[i] < hi) return TRUE;
 506 #       ifdef IA64
 507           if (marker_bsp[i] > lo & marker_bsp[i] < hi) return TRUE;
 508 #       endif
 509       }
 510 #   endif
 511     for (i = 0; i < THREAD_TABLE_SZ; i++) {
 512       for (p = GC_threads[i]; p != 0; p = p -> next) {
 513         if (0 != p -> stack_end) {
 514 #         ifdef STACK_GROWS_UP
 515             if (p -> stack_end >= lo && p -> stack_end < hi) return TRUE;
 516 #         else /* STACK_GROWS_DOWN */
 517             if (p -> stack_end > lo && p -> stack_end <= hi) return TRUE;
 518 #         endif
 519         }
 520       }
 521     }
 522     return FALSE;
 523 }
 524 #endif /* USE_PROC_FOR_LIBRARIES */
 525
 526 #ifdef IA64
 527 /* Find the largest stack_base smaller than bound.  May be used */
 528 /* to find the boundary between a register stack and adjacent   */
 529 /* immediately preceding memory stack.                          */
 530 ptr_t GC_greatest_stack_base_below(ptr_t bound)
 531 {
 532     int i;
 533     GC_thread p;
 534     ptr_t result = 0;
 535
 536     GC_ASSERT(I_HOLD_LOCK());
 537 #   ifdef PARALLEL_MARK
 538       for (i = 0; i < GC_markers; ++i) {
 539         if (marker_sp[i] > result && marker_sp[i] < bound)
 540           result = marker_sp[i];
 541       }
 542 #   endif
 543     for (i = 0; i < THREAD_TABLE_SZ; i++) {
 544       for (p = GC_threads[i]; p != 0; p = p -> next) {
 545         if (p -> stack_end > result && p -> stack_end < bound) {
 546           result = p -> stack_end;
 547         }
 548       }
 549     }
 550     return result;
 551 }
 552 #endif /* IA64 */
 553
 554 #ifdef GC_LINUX_THREADS
 555 /* Return the number of processors, or i<= 0 if it can't be determined. */
 556 int GC_get_nprocs(void)
 557 {
 558     /* Should be "return sysconf(_SC_NPROCESSORS_ONLN);" but that       */
 559     /* appears to be buggy in many cases.                               */
 560     /* We look for lines "cpu<n>" in /proc/stat.                        */
 561 #   define STAT_BUF_SIZE 4096
 562 #   define STAT_READ read
 563         /* If read is wrapped, this may need to be redefined to call    */
 564         /* the real one.                                                */
 565     char stat_buf[STAT_BUF_SIZE];
 566     int f;
 567     word result = 1;
 568         /* Some old kernels only have a single "cpu nnnn ..."   */
 569         /* entry in /proc/stat.  We identify those as           */
 570         /* uniprocessors.                                       */
 571     size_t i, len = 0;
 572
 573     f = open("/proc/stat", O_RDONLY);
 574     if (f < 0 || (len = STAT_READ(f, stat_buf, STAT_BUF_SIZE)) < 100) {
 575         WARN("Couldn't read /proc/stat\n", 0);
 576         return -1;
 577     }
 578     for (i = 0; i < len - 100; ++i) {
 579         if (stat_buf[i] == '\n' && stat_buf[i+1] == 'c'
 580             && stat_buf[i+2] == 'p' && stat_buf[i+3] == 'u') {
 581             int cpu_no = atoi(stat_buf + i + 4);
 582             if (cpu_no >= result) result = cpu_no + 1;
 583         }
 584     }
 585     close(f);
 586     return result;
 587 }
 588 #endif /* GC_LINUX_THREADS */
 589
 590 /* We hold the GC lock.  Wait until an in-progress GC has finished.     */
 591 /* Repeatedly RELEASES GC LOCK in order to wait.                        */
 592 /* If wait_for_all is true, then we exit with the GC lock held and no   */
 593 /* collection in progress; otherwise we just wait for the current GC    */
 594 /* to finish.                                                           */
 595 extern GC_bool GC_collection_in_progress(void);
 596 void GC_wait_for_gc_completion(GC_bool wait_for_all)
 597 {
 598     GC_ASSERT(I_HOLD_LOCK());
 599     if (GC_incremental && GC_collection_in_progress()) {
 600         int old_gc_no = GC_gc_no;
 601
 602         /* Make sure that no part of our stack is still on the mark stack, */
 603         /* since it's about to be unmapped.                                */
 604         while (GC_incremental && GC_collection_in_progress()
 605                && (wait_for_all || old_gc_no == GC_gc_no)) {
 606             ENTER_GC();
 607             GC_in_thread_creation = TRUE;
 608             GC_collect_a_little_inner(1);
 609             GC_in_thread_creation = FALSE;
 610             EXIT_GC();
 611             UNLOCK();
 612             sched_yield();
 613             LOCK();
 614         }
 615     }
 616 }
 617
 618 #ifdef HANDLE_FORK
 619 /* Procedures called before and after a fork.  The goal here is to make */
 620 /* it safe to call GC_malloc() in a forked child.  It's unclear that is */
 621 /* attainable, since the single UNIX spec seems to imply that one       */
 622 /* should only call async-signal-safe functions, and we probably can't  */
 623 /* quite guarantee that.  But we give it our best shot.  (That same     */
 624 /* spec also implies that it's not safe to call the system malloc       */
 625 /* between fork() and exec().  Thus we're doing no worse than it.       */
 626
 627 /* Called before a fork()               */
 628 void GC_fork_prepare_proc(void)
 629 {
 630     /* Acquire all relevant locks, so that after releasing the locks    */
 631     /* the child will see a consistent state in which monitor           */
 632     /* invariants hold.  Unfortunately, we can't acquire libc locks     */
 633     /* we might need, and there seems to be no guarantee that libc      */
 634     /* must install a suitable fork handler.                            */
 635     /* Wait for an ongoing GC to finish, since we can't finish it in    */
 636     /* the (one remaining thread in) the child.                         */
 637       LOCK();
 638 #     if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
 639         GC_wait_for_reclaim();
 640 #     endif
 641       GC_wait_for_gc_completion(TRUE);
 642 #     if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
 643         GC_acquire_mark_lock();
 644 #     endif
 645 }
 646
 647 /* Called in parent after a fork()      */
 648 void GC_fork_parent_proc(void)
 649 {
 650 #   if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
 651       GC_release_mark_lock();
 652 #   endif
 653     UNLOCK();
 654 }
 655
 656 /* Called in child after a fork()       */
 657 void GC_fork_child_proc(void)
 658 {
 659     /* Clean up the thread table, so that just our thread is left. */
 660 #   if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
 661       GC_release_mark_lock();
 662 #   endif
 663     GC_remove_all_threads_but_me();
 664 #   ifdef PARALLEL_MARK
 665       /* Turn off parallel marking in the child, since we are probably  */
 666       /* just going to exec, and we would have to restart mark threads. */
 667         GC_markers = 1;
 668         GC_parallel = FALSE;
 669 #   endif /* PARALLEL_MARK */
 670     UNLOCK();
 671 }
 672 #endif /* HANDLE_FORK */
 673
 674 #if defined(GC_DGUX386_THREADS)
 675 /* Return the number of processors, or i<= 0 if it can't be determined. */
 676 int GC_get_nprocs(void)
 677 {
 678     /* <takis@XFree86.Org> */
 679     int numCpus;
 680     struct dg_sys_info_pm_info pm_sysinfo;
 681     int status =0;
 682
 683     status = dg_sys_info((long int *) &pm_sysinfo,
 684         DG_SYS_INFO_PM_INFO_TYPE, DG_SYS_INFO_PM_CURRENT_VERSION);
 685     if (status < 0)
 686        /* set -1 for error */
 687        numCpus = -1;
 688     else
 689       /* Active CPUs */
 690       numCpus = pm_sysinfo.idle_vp_count;
 691
 692 #  ifdef DEBUG_THREADS
 693     GC_printf("Number of active CPUs in this system: %d\n", numCpus);
 694 #  endif
 695     return(numCpus);
 696 }
 697 #endif /* GC_DGUX386_THREADS */
 698
 699 #if defined(GC_NETBSD_THREADS)
 700 static int get_ncpu(void)
 701 {
 702     int mib[] = {CTL_HW,HW_NCPU};
 703     int res;
 704     size_t len = sizeof(res);
 705
 706     sysctl(mib, sizeof(mib)/sizeof(int), &res, &len, NULL, 0);
 707     return res;
 708 }
 709 #endif  /* GC_NETBSD_THREADS */
 710
 711 # if defined(GC_LINUX_THREADS) && defined(INCLUDE_LINUX_THREAD_DESCR)
 712 __thread int dummy_thread_local;
 713 # endif
 714
 715 /* We hold the allocation lock. */
 716 void GC_thr_init(void)
 717 {
 718 #   ifndef GC_DARWIN_THREADS
 719         int dummy;
 720 #   endif
 721     GC_thread t;
 722
 723     if (GC_thr_initialized) return;
 724     GC_thr_initialized = TRUE;
 725
 726 #   ifdef HANDLE_FORK
 727       /* Prepare for a possible fork.   */
 728         pthread_atfork(GC_fork_prepare_proc, GC_fork_parent_proc,
 729                        GC_fork_child_proc);
 730 #   endif /* HANDLE_FORK */
 731 #   if defined(INCLUDE_LINUX_THREAD_DESCR)
 732       /* Explicitly register the region including the address           */
 733       /* of a thread local variable.  This should include thread        */
 734       /* locals for the main thread, except for those allocated         */
 735       /* in response to dlopen calls.                                   */
 736         {
 737           ptr_t thread_local_addr = (ptr_t)(&dummy_thread_local);
 738           ptr_t main_thread_start, main_thread_end;
 739           if (!GC_enclosing_mapping(thread_local_addr, &main_thread_start,
 740                                     &main_thread_end)) {
 741             ABORT("Failed to find mapping for main thread thread locals");
 742           }
 743           GC_add_roots_inner(main_thread_start, main_thread_end, FALSE);
 744         }
 745 #   endif
 746     /* Add the initial thread, so we can stop it.       */
 747       t = GC_new_thread(pthread_self());
 748 #     ifdef GC_DARWIN_THREADS
 749          t -> stop_info.mach_thread = mach_thread_self();
 750 #     else
 751          t -> stop_info.stack_ptr = (ptr_t)(&dummy);
 752 #     endif
 753       t -> flags = DETACHED | MAIN_THREAD;
 754
 755     GC_stop_init();
 756
 757     /* Set GC_nprocs.  */
 758       {
 759         char * nprocs_string = GETENV("GC_NPROCS");
 760         GC_nprocs = -1;
 761         if (nprocs_string != NULL) GC_nprocs = atoi(nprocs_string);
 762       }
 763       if (GC_nprocs <= 0) {
 764 #       if defined(GC_HPUX_THREADS)
 765           GC_nprocs = pthread_num_processors_np();
 766 #       endif
 767 #       if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS) \
 768            || defined(GC_SOLARIS_THREADS) || defined(GC_GNU_THREADS)
 769           GC_nprocs = sysconf(_SC_NPROCESSORS_ONLN);
 770           if (GC_nprocs <= 0) GC_nprocs = 1;
 771 #       endif
 772 #       if defined(GC_IRIX_THREADS)
 773           GC_nprocs = sysconf(_SC_NPROC_ONLN);
 774           if (GC_nprocs <= 0) GC_nprocs = 1;
 775 #       endif
 776 #       if defined(GC_NETBSD_THREADS)
 777           GC_nprocs = get_ncpu();
 778 #       endif
 779 #       if defined(GC_DARWIN_THREADS) || defined(GC_FREEBSD_THREADS)
 780           int ncpus = 1;
 781           size_t len = sizeof(ncpus);
 782           sysctl((int[2]) {CTL_HW, HW_NCPU}, 2, &ncpus, &len, NULL, 0);
 783           GC_nprocs = ncpus;
 784 #       endif
 785 #       if defined(GC_LINUX_THREADS) || defined(GC_DGUX386_THREADS)
 786           GC_nprocs = GC_get_nprocs();
 787 #       endif
 788       }
 789       if (GC_nprocs <= 0) {
 790         WARN("GC_get_nprocs() returned %ld\n", GC_nprocs);
 791         GC_nprocs = 2;
 792 #       ifdef PARALLEL_MARK
 793           GC_markers = 1;
 794 #       endif
 795       } else {
 796 #       ifdef PARALLEL_MARK
 797           {
 798             char * markers_string = GETENV("GC_MARKERS");
 799             if (markers_string != NULL) {
 800               GC_markers = atoi(markers_string);
 801             } else {
 802               GC_markers = GC_nprocs;
 803             }
 804           }
 805 #       endif
 806       }
 807 #   ifdef PARALLEL_MARK
 808       if (GC_print_stats) {
 809           GC_log_printf("Number of processors = %ld, "
 810                  "number of marker threads = %ld\n", GC_nprocs, GC_markers);
 811       }
 812       if (GC_markers == 1) {
 813         GC_parallel = FALSE;
 814         if (GC_print_stats) {
 815             GC_log_printf(
 816                 "Single marker thread, turning off parallel marking\n");
 817         }
 818       } else {
 819         GC_parallel = TRUE;
 820         /* Disable true incremental collection, but generational is OK. */
 821         GC_time_limit = GC_TIME_UNLIMITED;
 822       }
 823       /* If we are using a parallel marker, actually start helper threads.  */
 824         if (GC_parallel) start_mark_threads();
 825 #   endif
 826 }
 827
 828
 829 /* Perform all initializations, including those that    */
 830 /* may require allocation.                              */
 831 /* Called without allocation lock.                      */
 832 /* Must be called before a second thread is created.    */
 833 /* Did we say it's called without the allocation lock?  */
 834 void GC_init_parallel(void)
 835 {
 836     if (parallel_initialized) return;
 837     parallel_initialized = TRUE;
 838
 839     /* GC_init() calls us back, so set flag first.      */
 840     if (!GC_is_initialized) GC_init();
 841     /* Initialize thread local free lists if used.      */
 842 #   if defined(THREAD_LOCAL_ALLOC)
 843       LOCK();
 844       GC_init_thread_local(&(GC_lookup_thread(pthread_self())->tlfs));
 845       UNLOCK();
 846 #   endif
 847 }
 848
 849
 850 #if !defined(GC_DARWIN_THREADS)
 851 int WRAP_FUNC(pthread_sigmask)(int how, const sigset_t *set, sigset_t *oset)
 852 {
 853     sigset_t fudged_set;
 854
 855     INIT_REAL_SYMS();
 856     if (set != NULL && (how == SIG_BLOCK || how == SIG_SETMASK)) {
 857         fudged_set = *set;
 858         sigdelset(&fudged_set, SIG_SUSPEND);
 859         set = &fudged_set;
 860     }
 861     return(REAL_FUNC(pthread_sigmask)(how, set, oset));
 862 }
 863 #endif /* !GC_DARWIN_THREADS */
 864
 865 /* Wrapper for functions that are likely to block for an appreciable    */
 866 /* length of time.                                                      */
 867
 868 struct blocking_data {
 869     void (*fn)(void *);
 870     void *arg;
 871 };
 872
 873 static void GC_do_blocking_inner(ptr_t data, void * context) {
 874     struct blocking_data * d = (struct blocking_data *) data;
 875     GC_thread me;
 876     LOCK();
 877     me = GC_lookup_thread(pthread_self());
 878     GC_ASSERT(!(me -> thread_blocked));
 879 #   ifdef SPARC
 880         me -> stop_info.stack_ptr = GC_save_regs_in_stack();
 881 #   elif !defined(GC_DARWIN_THREADS)
 882         me -> stop_info.stack_ptr = GC_approx_sp();
 883 #   endif
 884 #   ifdef IA64
 885         me -> backing_store_ptr = GC_save_regs_in_stack();
 886 #   endif
 887     me -> thread_blocked = TRUE;
 888     /* Save context here if we want to support precise stack marking */
 889     UNLOCK();
 890     (d -> fn)(d -> arg);
 891     LOCK();   /* This will block if the world is stopped.       */
 892     me -> thread_blocked = FALSE;
 893     UNLOCK();
 894 }
 895
 896 void GC_do_blocking(void (*fn)(void *), void *arg) {
 897     struct blocking_data my_data;
 898
 899     my_data.fn = fn;
 900     my_data.arg = arg;
 901     GC_with_callee_saves_pushed(GC_do_blocking_inner, (ptr_t)(&my_data));
 902 }
 903
 904 struct start_info {
 905     void *(*start_routine)(void *);
 906     void *arg;
 907     word flags;
 908     sem_t registered;           /* 1 ==> in our thread table, but       */
 909                                 /* parent hasn't yet noticed.           */
 910 };
 911
 912 int GC_unregister_my_thread(void)
 913 {
 914     GC_thread me;
 915
 916     LOCK();
 917     /* Wait for any GC that may be marking from our stack to    */
 918     /* complete before we remove this thread.                   */
 919     GC_wait_for_gc_completion(FALSE);
 920     me = GC_lookup_thread(pthread_self());
 921 #   if defined(THREAD_LOCAL_ALLOC)
 922       GC_destroy_thread_local(&(me->tlfs));
 923 #   endif
 924     if (me -> flags & DETACHED) {
 925         GC_delete_thread(pthread_self());
 926     } else {
 927         me -> flags |= FINISHED;
 928     }
 929 #   if defined(THREAD_LOCAL_ALLOC)
 930       GC_remove_specific(GC_thread_key);
 931 #   endif
 932     UNLOCK();
 933     return GC_SUCCESS;
 934 }
 935
 936 /* Called at thread exit.                               */
 937 /* Never called for main thread.  That's OK, since it   */
 938 /* results in at most a tiny one-time leak.  And        */
 939 /* linuxthreads doesn't reclaim the main threads        */
 940 /* resources or id anyway.                              */
 941 void GC_thread_exit_proc(void *arg)
 942 {
 943     GC_unregister_my_thread();
 944 }
 945
 946 int WRAP_FUNC(pthread_join)(pthread_t thread, void **retval)
 947 {
 948     int result;
 949     GC_thread thread_gc_id;
 950
 951     INIT_REAL_SYMS();
 952     LOCK();
 953     thread_gc_id = GC_lookup_thread(thread);
 954     /* This is guaranteed to be the intended one, since the thread id   */
 955     /* cant have been recycled by pthreads.                             */
 956     UNLOCK();
 957     result = REAL_FUNC(pthread_join)(thread, retval);
 958 # if defined (GC_FREEBSD_THREADS)
 959     /* On FreeBSD, the wrapped pthread_join() sometimes returns (what
 960        appears to be) a spurious EINTR which caused the test and real code
 961        to gratuitously fail.  Having looked at system pthread library source
 962        code, I see how this return code may be generated.  In one path of
 963        code, pthread_join() just returns the errno setting of the thread
 964        being joined.  This does not match the POSIX specification or the
 965        local man pages thus I have taken the liberty to catch this one
 966        spurious return value properly conditionalized on GC_FREEBSD_THREADS. */
 967     if (result == EINTR) result = 0;
 968 # endif
 969     if (result == 0) {
 970         LOCK();
 971         /* Here the pthread thread id may have been recycled. */
 972         GC_delete_gc_thread(thread_gc_id);
 973         UNLOCK();
 974     }
 975     return result;
 976 }
 977
 978 int
 979 WRAP_FUNC(pthread_detach)(pthread_t thread)
 980 {
 981     int result;
 982     GC_thread thread_gc_id;
 983
 984     INIT_REAL_SYMS();
 985     LOCK();
 986     thread_gc_id = GC_lookup_thread(thread);
 987     UNLOCK();
 988     result = REAL_FUNC(pthread_detach)(thread);
 989     if (result == 0) {
 990       LOCK();
 991       thread_gc_id -> flags |= DETACHED;
 992       /* Here the pthread thread id may have been recycled. */
 993       if (thread_gc_id -> flags & FINISHED) {
 994         GC_delete_gc_thread(thread_gc_id);
 995       }
 996       UNLOCK();
 997     }
 998     return result;
 999 }
1000
1001 GC_bool GC_in_thread_creation = FALSE;  /* Protected by allocation lock. */
1002
1003 GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
1004                                       pthread_t my_pthread)
1005 {
1006     GC_thread me;
1007
1008     GC_in_thread_creation = TRUE; /* OK to collect from unknown thread. */
1009     me = GC_new_thread(my_pthread);
1010     GC_in_thread_creation = FALSE;
1011 #   ifdef GC_DARWIN_THREADS
1012       me -> stop_info.mach_thread = mach_thread_self();
1013 #   else
1014       me -> stop_info.stack_ptr = sb -> mem_base;
1015 #   endif
1016     me -> stack_end = sb -> mem_base;
1017 #   ifdef IA64
1018       me -> backing_store_end = sb -> reg_base;
1019 #   endif /* IA64 */
1020     return me;
1021 }
1022
1023 int GC_register_my_thread(struct GC_stack_base *sb)
1024 {
1025     pthread_t my_pthread = pthread_self();
1026     GC_thread me;
1027
1028     LOCK();
1029     me = GC_lookup_thread(my_pthread);
1030     if (0 == me) {
1031         me = GC_register_my_thread_inner(sb, my_pthread);
1032         me -> flags |= DETACHED;
1033           /* Treat as detached, since we do not need to worry about     */
1034           /* pointer results.                                           */
1035         UNLOCK();
1036         return GC_SUCCESS;
1037     } else {
1038         UNLOCK();
1039         return GC_DUPLICATE;
1040     }
1041 }
1042
1043 void * GC_inner_start_routine(struct GC_stack_base *sb, void * arg)
1044 {
1045     struct start_info * si = arg;
1046     void * result;
1047     GC_thread me;
1048     pthread_t my_pthread;
1049     void *(*start)(void *);
1050     void *start_arg;
1051
1052     my_pthread = pthread_self();
1053 #   ifdef DEBUG_THREADS
1054         GC_printf("Starting thread 0x%x\n", (unsigned)my_pthread);
1055         GC_printf("pid = %ld\n", (long) getpid());
1056         GC_printf("sp = 0x%lx\n", (long) &arg);
1057 #   endif
1058     LOCK();
1059     me = GC_register_my_thread_inner(sb, my_pthread);
1060     me -> flags = si -> flags;
1061     UNLOCK();
1062     start = si -> start_routine;
1063 #   ifdef DEBUG_THREADS
1064         GC_printf("start_routine = %p\n", (void *)start);
1065 #   endif
1066     start_arg = si -> arg;
1067     sem_post(&(si -> registered));      /* Last action on si.   */
1068                                         /* OK to deallocate.    */
1069     pthread_cleanup_push(GC_thread_exit_proc, 0);
1070 #   if defined(THREAD_LOCAL_ALLOC)
1071         LOCK();
1072         GC_init_thread_local(&(me->tlfs));
1073         UNLOCK();
1074 #   endif
1075     result = (*start)(start_arg);
1076 #   if DEBUG_THREADS
1077         GC_printf("Finishing thread 0x%x\n", (unsigned)pthread_self());
1078 #   endif
1079     me -> status = result;
1080     pthread_cleanup_pop(1);
1081     /* Cleanup acquires lock, ensuring that we can't exit               */
1082     /* while a collection that thinks we're alive is trying to stop     */
1083     /* us.                                                              */
1084     return(result);
1085 }
1086
1087 void * GC_start_routine(void * arg)
1088 {
1089 #   ifdef INCLUDE_LINUX_THREAD_DESCR
1090       struct GC_stack_base sb;
1091
1092 #     ifdef REDIRECT_MALLOC
1093         /* GC_get_stack_base may call pthread_getattr_np, which can     */
1094         /* unfortunately call realloc, which may allocate from an       */
1095         /* unregistered thread.  This is unpleasant, since it might     */
1096         /* force heap growth.                                           */
1097         GC_disable();
1098 #     endif
1099       if (GC_get_stack_base(&sb) != GC_SUCCESS)
1100         ABORT("Failed to get thread stack base.");
1101 #     ifdef REDIRECT_MALLOC
1102         GC_enable();
1103 #     endif
1104       return GC_inner_start_routine(&sb, arg);
1105 #   else
1106       return GC_call_with_stack_base(GC_inner_start_routine, arg);
1107 #   endif
1108 }
1109
1110 int
1111 WRAP_FUNC(pthread_create)(pthread_t *new_thread,
1112                   const pthread_attr_t *attr,
1113                   void *(*start_routine)(void *), void *arg)
1114 {
1115     int result;
1116     int detachstate;
1117     word my_flags = 0;
1118     struct start_info * si;
1119         /* This is otherwise saved only in an area mmapped by the thread */
1120         /* library, which isn't visible to the collector.                */
1121
1122     /* We resist the temptation to muck with the stack size here,       */
1123     /* even if the default is unreasonably small.  That's the client's  */
1124     /* responsibility.                                                  */
1125
1126     INIT_REAL_SYMS();
1127     LOCK();
1128     si = (struct start_info *)GC_INTERNAL_MALLOC(sizeof(struct start_info),
1129                                                  NORMAL);
1130     UNLOCK();
1131     if (!parallel_initialized) GC_init_parallel();
1132     if (0 == si) return(ENOMEM);
1133     sem_init(&(si -> registered), 0, 0);
1134     si -> start_routine = start_routine;
1135     si -> arg = arg;
1136     LOCK();
1137     if (!GC_thr_initialized) GC_thr_init();
1138 #   ifdef GC_ASSERTIONS
1139       {
1140         size_t stack_size = 0;
1141         if (NULL != attr) {
1142            pthread_attr_getstacksize(attr, &stack_size);
1143         }
1144         if (0 == stack_size) {
1145            pthread_attr_t my_attr;
1146            pthread_attr_init(&my_attr);
1147            pthread_attr_getstacksize(&my_attr, &stack_size);
1148         }
1149         /* On Solaris 10, with default attr initialization,     */
1150         /* stack_size remains 0.  Fudge it.                     */
1151         if (0 == stack_size) {
1152 #           ifndef SOLARIS
1153               WARN("Failed to get stack size for assertion checking\n", 0);
1154 #           endif
1155             stack_size = 1000000;
1156         }
1157 #       ifdef PARALLEL_MARK
1158           GC_ASSERT(stack_size >= (8*HBLKSIZE*sizeof(word)));
1159 #       else
1160           /* FreeBSD-5.3/Alpha: default pthread stack is 64K,   */
1161           /* HBLKSIZE=8192, sizeof(word)=8                      */
1162           GC_ASSERT(stack_size >= 65536);
1163 #       endif
1164         /* Our threads may need to do some work for the GC.     */
1165         /* Ridiculously small threads won't work, and they      */
1166         /* probably wouldn't work anyway.                       */
1167       }
1168 #   endif
1169     if (NULL == attr) {
1170         detachstate = PTHREAD_CREATE_JOINABLE;
1171     } else {
1172         pthread_attr_getdetachstate(attr, &detachstate);
1173     }
1174     if (PTHREAD_CREATE_DETACHED == detachstate) my_flags |= DETACHED;
1175     si -> flags = my_flags;
1176     UNLOCK();
1177 #   ifdef DEBUG_THREADS
1178         GC_printf("About to start new thread from thread 0x%x\n",
1179                   (unsigned)pthread_self());
1180 #   endif
1181     GC_need_to_lock = TRUE;
1182
1183     result = REAL_FUNC(pthread_create)(new_thread, attr, GC_start_routine, si);
1184
1185 #   ifdef DEBUG_THREADS
1186         GC_printf("Started thread 0x%x\n", (unsigned)(*new_thread));
1187 #   endif
1188     /* Wait until child has been added to the thread table.             */
1189     /* This also ensures that we hold onto si until the child is done   */
1190     /* with it.  Thus it doesn't matter whether it is otherwise         */
1191     /* visible to the collector.                                        */
1192     if (0 == result) {
1193         while (0 != sem_wait(&(si -> registered))) {
1194             if (EINTR != errno) ABORT("sem_wait failed");
1195         }
1196     }
1197     sem_destroy(&(si -> registered));
1198     LOCK();
1199     GC_INTERNAL_FREE(si);
1200     UNLOCK();
1201
1202     return(result);
1203 }
1204
1205 /* Spend a few cycles in a way that can't introduce contention with     */
1206 /* othre threads.                                                       */
1207 void GC_pause(void)
1208 {
1209     int i;
1210 #   if !defined(__GNUC__) || defined(__INTEL_COMPILER)
1211       volatile word dummy = 0;
1212 #   endif
1213
1214     for (i = 0; i < 10; ++i) {
1215 #     if defined(__GNUC__) && !defined(__INTEL_COMPILER)
1216         __asm__ __volatile__ (" " : : : "memory");
1217 #     else
1218         /* Something that's unlikely to be optimized away. */
1219         GC_noop(++dummy);
1220 #     endif
1221     }
1222 }
1223
1224 #define SPIN_MAX 128    /* Maximum number of calls to GC_pause before   */
1225                         /* give up.                                     */
1226
1227 volatile GC_bool GC_collecting = 0;
1228                         /* A hint that we're in the collector and       */
1229                         /* holding the allocation lock for an           */
1230                         /* extended period.                             */
1231
1232 #if !defined(USE_SPIN_LOCK) || defined(PARALLEL_MARK)
1233 /* If we don't want to use the below spinlock implementation, either    */
1234 /* because we don't have a GC_test_and_set implementation, or because   */
1235 /* we don't want to risk sleeping, we can still try spinning on         */
1236 /* pthread_mutex_trylock for a while.  This appears to be very          */
1237 /* beneficial in many cases.                                            */
1238 /* I suspect that under high contention this is nearly always better    */
1239 /* than the spin lock.  But it's a bit slower on a uniprocessor.        */
1240 /* Hence we still default to the spin lock.                             */
1241 /* This is also used to acquire the mark lock for the parallel          */
1242 /* marker.                                                              */
1243
1244 /* Here we use a strict exponential backoff scheme.  I don't know       */
1245 /* whether that's better or worse than the above.  We eventually        */
1246 /* yield by calling pthread_mutex_lock(); it never makes sense to       */
1247 /* explicitly sleep.                                                    */
1248
1249 #define LOCK_STATS
1250 #ifdef LOCK_STATS
1251   unsigned long GC_spin_count = 0;
1252   unsigned long GC_block_count = 0;
1253   unsigned long GC_unlocked_count = 0;
1254 #endif
1255
1256 void GC_generic_lock(pthread_mutex_t * lock)
1257 {
1258 #ifndef NO_PTHREAD_TRYLOCK
1259     unsigned pause_length = 1;
1260     unsigned i;
1261
1262     if (0 == pthread_mutex_trylock(lock)) {
1263 #       ifdef LOCK_STATS
1264             ++GC_unlocked_count;
1265 #       endif
1266         return;
1267     }
1268     for (; pause_length <= SPIN_MAX; pause_length <<= 1) {
1269         for (i = 0; i < pause_length; ++i) {
1270             GC_pause();
1271         }
1272         switch(pthread_mutex_trylock(lock)) {
1273             case 0:
1274 #               ifdef LOCK_STATS
1275                     ++GC_spin_count;
1276 #               endif
1277                 return;
1278             case EBUSY:
1279                 break;
1280             default:
1281                 ABORT("Unexpected error from pthread_mutex_trylock");
1282         }
1283     }
1284 #endif /* !NO_PTHREAD_TRYLOCK */
1285 #   ifdef LOCK_STATS
1286         ++GC_block_count;
1287 #   endif
1288     pthread_mutex_lock(lock);
1289 }
1290
1291 #endif /* !USE_SPIN_LOCK || PARALLEL_MARK */
1292
1293 #if defined(USE_SPIN_LOCK)
1294
1295 /* Reasonably fast spin locks.  Basically the same implementation */
1296 /* as STL alloc.h.  This isn't really the right way to do this.   */
1297 /* but until the POSIX scheduling mess gets straightened out ...  */
1298
1299 volatile AO_TS_t GC_allocate_lock = 0;
1300
1301
1302 void GC_lock(void)
1303 {
1304 #   define low_spin_max 30  /* spin cycles if we suspect uniprocessor */
1305 #   define high_spin_max SPIN_MAX /* spin cycles for multiprocessor */
1306     static unsigned spin_max = low_spin_max;
1307     unsigned my_spin_max;
1308     static unsigned last_spins = 0;
1309     unsigned my_last_spins;
1310     int i;
1311
1312     if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_CLEAR) {
1313         return;
1314     }
1315     my_spin_max = spin_max;
1316     my_last_spins = last_spins;
1317     for (i = 0; i < my_spin_max; i++) {
1318         if (GC_collecting || GC_nprocs == 1) goto yield;
1319         if (i < my_last_spins/2) {
1320             GC_pause();
1321             continue;
1322         }
1323         if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_CLEAR) {
1324             /*
1325              * got it!
1326              * Spinning worked.  Thus we're probably not being scheduled
1327              * against the other process with which we were contending.
1328              * Thus it makes sense to spin longer the next time.
1329              */
1330             last_spins = i;
1331             spin_max = high_spin_max;
1332             return;
1333         }
1334     }
1335     /* We are probably being scheduled against the other process.  Sleep. */
1336     spin_max = low_spin_max;
1337 yield:
1338     for (i = 0;; ++i) {
1339         if (AO_test_and_set_acquire(&GC_allocate_lock) == AO_TS_CLEAR) {
1340             return;
1341         }
1342 #       define SLEEP_THRESHOLD 12
1343                 /* Under Linux very short sleeps tend to wait until     */
1344                 /* the current time quantum expires.  On old Linux      */
1345                 /* kernels nanosleep(<= 2ms) just spins under Linux.    */
1346                 /* (Under 2.4, this happens only for real-time          */
1347                 /* processes.)  We want to minimize both behaviors      */
1348                 /* here.                                                */
1349         if (i < SLEEP_THRESHOLD) {
1350             sched_yield();
1351         } else {
1352             struct timespec ts;
1353
1354             if (i > 24) i = 24;
1355                         /* Don't wait for more than about 15msecs, even */
1356                         /* under extreme contention.                    */
1357             ts.tv_sec = 0;
1358             ts.tv_nsec = 1 << i;
1359             nanosleep(&ts, 0);
1360         }
1361     }
1362 }
1363
1364 #else  /* !USE_SPINLOCK */
1365 void GC_lock(void)
1366 {
1367 #ifndef NO_PTHREAD_TRYLOCK
1368     if (1 == GC_nprocs || GC_collecting) {
1369         pthread_mutex_lock(&GC_allocate_ml);
1370     } else {
1371         GC_generic_lock(&GC_allocate_ml);
1372     }
1373 #else  /* !NO_PTHREAD_TRYLOCK */
1374     pthread_mutex_lock(&GC_allocate_ml);
1375 #endif /* !NO_PTHREAD_TRYLOCK */
1376 }
1377
1378 #endif /* !USE_SPINLOCK */
1379
1380 #if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
1381
1382 #ifdef GC_ASSERTIONS
1383   unsigned long GC_mark_lock_holder = NO_THREAD;
1384 #endif
1385
1386 #if 0
1387   /* Ugly workaround for a linux threads bug in the final versions      */
1388   /* of glibc2.1.  Pthread_mutex_trylock sets the mutex owner           */
1389   /* field even when it fails to acquire the mutex.  This causes        */
1390   /* pthread_cond_wait to die.  Remove for glibc2.2.                    */
1391   /* According to the man page, we should use                           */
1392   /* PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP, but that isn't actually   */
1393   /* defined.                                                           */
1394   static pthread_mutex_t mark_mutex =
1395         {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, {0, 0}};
1396 #else
1397   static pthread_mutex_t mark_mutex = PTHREAD_MUTEX_INITIALIZER;
1398 #endif
1399
1400 static pthread_cond_t builder_cv = PTHREAD_COND_INITIALIZER;
1401
1402 void GC_acquire_mark_lock(void)
1403 {
1404 /*
1405     if (pthread_mutex_lock(&mark_mutex) != 0) {
1406         ABORT("pthread_mutex_lock failed");
1407     }
1408 */
1409     GC_generic_lock(&mark_mutex);
1410 #   ifdef GC_ASSERTIONS
1411         GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self());
1412 #   endif
1413 }
1414
1415 void GC_release_mark_lock(void)
1416 {
1417     GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self()));
1418 #   ifdef GC_ASSERTIONS
1419         GC_mark_lock_holder = NO_THREAD;
1420 #   endif
1421     if (pthread_mutex_unlock(&mark_mutex) != 0) {
1422         ABORT("pthread_mutex_unlock failed");
1423     }
1424 }
1425
1426 /* Collector must wait for a freelist builders for 2 reasons:           */
1427 /* 1) Mark bits may still be getting examined without lock.             */
1428 /* 2) Partial free lists referenced only by locals may not be scanned   */
1429 /*    correctly, e.g. if they contain "pointer-free" objects, since the */
1430 /*    free-list link may be ignored.                                    */
1431 void GC_wait_builder(void)
1432 {
1433     GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self()));
1434 #   ifdef GC_ASSERTIONS
1435         GC_mark_lock_holder = NO_THREAD;
1436 #   endif
1437     if (pthread_cond_wait(&builder_cv, &mark_mutex) != 0) {
1438         ABORT("pthread_cond_wait failed");
1439     }
1440     GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
1441 #   ifdef GC_ASSERTIONS
1442         GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self());
1443 #   endif
1444 }
1445
1446 void GC_wait_for_reclaim(void)
1447 {
1448     GC_acquire_mark_lock();
1449     while (GC_fl_builder_count > 0) {
1450         GC_wait_builder();
1451     }
1452     GC_release_mark_lock();
1453 }
1454
1455 void GC_notify_all_builder(void)
1456 {
1457     GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self()));
1458     if (pthread_cond_broadcast(&builder_cv) != 0) {
1459         ABORT("pthread_cond_broadcast failed");
1460     }
1461 }
1462
1463 #endif /* PARALLEL_MARK || THREAD_LOCAL_ALLOC */
1464
1465 #ifdef PARALLEL_MARK
1466
1467 static pthread_cond_t mark_cv = PTHREAD_COND_INITIALIZER;
1468
1469 void GC_wait_marker(void)
1470 {
1471     GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self()));
1472 #   ifdef GC_ASSERTIONS
1473         GC_mark_lock_holder = NO_THREAD;
1474 #   endif
1475     if (pthread_cond_wait(&mark_cv, &mark_mutex) != 0) {
1476         ABORT("pthread_cond_wait failed");
1477     }
1478     GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
1479 #   ifdef GC_ASSERTIONS
1480         GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self());
1481 #   endif
1482 }
1483
1484 void GC_notify_all_marker(void)
1485 {
1486     if (pthread_cond_broadcast(&mark_cv) != 0) {
1487         ABORT("pthread_cond_broadcast failed");
1488     }
1489 }
1490
1491 #endif /* PARALLEL_MARK */
1492
1493 # endif /* GC_LINUX_THREADS and friends */
1494