X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=src%2Fmm%2Fboehm-gc%2Fwin32_threads.c;h=1ec73b845f31f992168697dab6d83275ca6beea3;hb=c83bff94e40ee0a218a39931af17814d1a42cb5c;hp=5a714f5bde1af0c1a2d47dee91b41ef0da0bbd68;hpb=29c5375943f6b0d6c2e2f77c07c13ae04dae5565;p=cacao.git diff --git a/src/mm/boehm-gc/win32_threads.c b/src/mm/boehm-gc/win32_threads.c index 5a714f5bd..1ec73b845 100755 --- a/src/mm/boehm-gc/win32_threads.c +++ b/src/mm/boehm-gc/win32_threads.c @@ -1,11 +1,45 @@ -#include "config.h" +/* + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * Copyright (c) 1996 by Silicon Graphics. All rights reserved. + * Copyright (c) 1998 by Fergus Henderson. All rights reserved. + * Copyright (c) 2000-2008 by Hewlett-Packard Development Company. + * All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED + * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program + * for any purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is granted, + * provided the above notices are retained, and a notice that the code was + * modified is included with the above copyright notice. + */ + #include "private/gc_priv.h" -#if defined(GC_WIN32_THREADS) +#if defined(GC_WIN32_THREADS) #include -#ifdef CYGWIN32 +#ifdef THREAD_LOCAL_ALLOC +# include "private/thread_local_alloc.h" +#endif /* THREAD_LOCAL_ALLOC */ + +/* Allocation lock declarations. */ +#if !defined(USE_PTHREAD_LOCKS) +# if defined(GC_DLL) + __declspec(dllexport) CRITICAL_SECTION GC_allocate_ml; +# else + CRITICAL_SECTION GC_allocate_ml; +# endif + DWORD GC_lock_holder = NO_THREAD; + /* Thread id for current holder of allocation lock */ +#else + pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER; + unsigned long GC_lock_holder = NO_THREAD; +#endif + +#ifdef GC_PTHREADS # include /* Cygwin-specific forward decls */ @@ -15,136 +49,403 @@ # undef pthread_detach # undef dlopen -# define DEBUG_CYGWIN_THREADS 0 +# ifdef DEBUG_THREADS +# ifdef CYGWIN32 +# define DEBUG_CYGWIN_THREADS 1 +# define DEBUG_WIN32_PTHREADS 0 +# else +# define DEBUG_WIN32_PTHREADS 1 +# define DEBUG_CYGWIN_THREADS 0 +# endif +# else +# define DEBUG_CYGWIN_THREADS 0 +# define DEBUG_WIN32_PTHREADS 0 +# endif + + STATIC void * GC_pthread_start(void * arg); + STATIC void GC_thread_exit_proc(void *arg); + +# include + +#else + +# ifdef DEBUG_THREADS +# define DEBUG_WIN32_THREADS 1 +# else +# define DEBUG_WIN32_THREADS 0 +# endif + +# undef CreateThread +# undef ExitThread +# undef _beginthreadex +# undef _endthreadex +# undef _beginthread +# ifdef DEBUG_THREADS +# define DEBUG_WIN32_THREADS 1 +# else +# define DEBUG_WIN32_THREADS 0 +# endif + +# ifndef MSWINCE +# include /* For _beginthreadex, _endthreadex */ +# endif - void * GC_start_routine(void * arg); - void GC_thread_exit_proc(void *arg); +#endif +/* DllMain-based thread registration is currently incompatible */ +/* with thread-local allocation, pthreads and WinCE. */ +#if defined(GC_DLL) && !defined(MSWINCE) \ + && !defined(THREAD_LOCAL_ALLOC) && !defined(GC_PTHREADS) + static GC_bool GC_win32_dll_threads = FALSE; + /* This code operates in two distinct modes, depending on */ + /* the setting of GC_win32_dll_threads. If */ + /* GC_win32_dll_threads is set, all threads in the process */ + /* are implicitly registered with the GC by DllMain. */ + /* No explicit registration is required, and attempts at */ + /* explicit registration are ignored. This mode is */ + /* very different from the Posix operation of the collector. */ + /* In this mode access to the thread table is lock-free. */ + /* Hence there is a static limit on the number of threads. */ + + /* If GC_win32_dll_threads is FALSE, or the collector is */ + /* built without GC_DLL defined, things operate in a way */ + /* that is very similar to Posix platforms, and new threads */ + /* must be registered with the collector, e.g. by using */ + /* preprocessor-based interception of the thread primitives. */ + /* In this case, we use a real data structure for the thread */ + /* table. Note that there is no equivalent of linker-based */ + /* call interception, since we don't have ELF-like */ + /* facilities. The Windows analog appears to be "API */ + /* hooking", which really seems to be a standard way to */ + /* do minor binary rewriting (?). I'd prefer not to have */ + /* the basic collector rely on such facilities, but an */ + /* optional package that intercepts thread calls this way */ + /* would probably be nice. */ + + /* GC_win32_dll_threads must be set at initialization time, */ + /* i.e. before any collector or thread calls. We make it a */ + /* "dynamic" option only to avoid multiple library versions. */ +#else +# define GC_win32_dll_threads FALSE +# undef MAX_THREADS +# define MAX_THREADS 1 /* dll_thread_table[] is always empty. */ #endif +/* We have two versions of the thread table. Which one */ +/* we us depends on whether or not GC_win32_dll_threads */ +/* is set. Note that before initialization, we don't */ +/* add any entries to either table, even if DllMain is */ +/* called. The main thread will be added on */ +/* initialization. */ + /* The type of the first argument to InterlockedExchange. */ /* Documented to be LONG volatile *, but at least gcc likes */ /* this better. */ typedef LONG * IE_t; -#ifndef MAX_THREADS -# define MAX_THREADS 256 - /* FIXME: */ - /* Things may get quite slow for large numbers of threads, */ - /* since we look them up with sequential search. */ -#endif - GC_bool GC_thr_initialized = FALSE; -DWORD GC_main_thread = 0; +GC_bool GC_need_to_lock = FALSE; + +static GC_bool parallel_initialized = FALSE; + +void GC_init_parallel(void); + +/* GC_use_DllMain() is currently incompatible with pthreads. */ +/* It might be possible to get GC_DLL and DllMain-based thread registration */ +/* to work with Cygwin, but if you try, you are on your own. */ +#if defined(GC_DLL) && !defined(GC_PTHREADS) + /* Turn on GC_win32_dll_threads */ + GC_API void GC_CALL GC_use_DllMain(void) + { +# ifdef THREAD_LOCAL_ALLOC + ABORT("Cannot use thread local allocation with DllMain-based " + "thread registration."); + /* Thread-local allocation really wants to lock at thread */ + /* entry and exit. */ +# else + GC_ASSERT(!parallel_initialized); + GC_win32_dll_threads = TRUE; + GC_init_parallel(); +# endif + } +#else + GC_API void GC_CALL GC_use_DllMain(void) + { + ABORT("GC not configured as DLL"); + } +#endif -struct GC_thread_Rep { - LONG in_use; /* Updated without lock. */ - /* We assert that unused */ - /* entries have invalid ids of */ - /* zero and zero stack fields. */ +STATIC DWORD GC_main_thread = 0; + +#define ADDR_LIMIT ((ptr_t)(word)-1) + +struct GC_Thread_Rep { + union { + AO_t tm_in_use; /* Updated without lock. */ + /* We assert that unused */ + /* entries have invalid ids of */ + /* zero and zero stack fields. */ + /* Used only with GC_win32_dll_threads. */ + struct GC_Thread_Rep * tm_next; + /* Hash table link without */ + /* GC_win32_dll_threads. */ + /* More recently allocated threads */ + /* with a given pthread id come */ + /* first. (All but the first are */ + /* guaranteed to be dead, but we may */ + /* not yet have registered the join.) */ + } table_management; +# define in_use table_management.tm_in_use +# define next table_management.tm_next DWORD id; HANDLE handle; ptr_t stack_base; /* The cold end of the stack. */ /* 0 ==> entry not valid. */ /* !in_use ==> stack_base == 0 */ + ptr_t last_stack_min; /* Last known minimum (hottest) address */ + /* in stack or ADDR_LIMIT if unset */ +# ifdef IA64 + ptr_t backing_store_end; + ptr_t backing_store_ptr; +# endif + GC_bool suspended; -# ifdef CYGWIN32 +# ifdef GC_PTHREADS void *status; /* hold exit value until join in case it's a pointer */ pthread_t pthread_id; short flags; /* Protected by GC lock. */ # define FINISHED 1 /* Thread has exited. */ # define DETACHED 2 /* Thread is intended to be detached. */ +# define KNOWN_FINISHED(t) (((t) -> flags) & FINISHED) +# else +# define KNOWN_FINISHED(t) 0 +# endif +# ifdef THREAD_LOCAL_ALLOC + struct thread_local_freelists tlfs; # endif }; -typedef volatile struct GC_thread_Rep * GC_thread; +typedef struct GC_Thread_Rep * GC_thread; +typedef volatile struct GC_Thread_Rep * GC_vthread; /* - * We generally assume that volatile ==> memory ordering, at least among - * volatiles. + * We assumed that volatile ==> memory ordering, at least among + * volatiles. This code should consistently use atomic_ops. */ volatile GC_bool GC_please_stop = FALSE; -volatile struct GC_thread_Rep thread_table[MAX_THREADS]; +/* + * We track thread attachments while the world is supposed to be stopped. + * Unfortunately, we can't stop them from starting, since blocking in + * DllMain seems to cause the world to deadlock. Thus we have to recover + * If we notice this in the middle of marking. + */ + +AO_t GC_attached_thread = FALSE; +/* Return TRUE if an thread was attached since we last asked or */ +/* since GC_attached_thread was explicitly reset. */ +GC_bool GC_started_thread_while_stopped(void) +{ + AO_t result; + + if (GC_win32_dll_threads) { + AO_nop_full(); /* Prior heap reads need to complete earlier. */ + result = AO_load(&GC_attached_thread); + if (result) { + AO_store(&GC_attached_thread, FALSE); + } + return ((GC_bool)result); + } else { + return FALSE; + } +} -volatile LONG GC_max_thread_index = 0; /* Largest index in thread_table */ - /* that was ever used. */ +/* Thread table used if GC_win32_dll_threads is set. */ +/* This is a fixed size array. */ +/* Since we use runtime conditionals, both versions */ +/* are always defined. */ +# ifndef MAX_THREADS +# define MAX_THREADS 512 +# endif + /* Things may get quite slow for large numbers of threads, */ + /* since we look them up with sequential search. */ + + volatile struct GC_Thread_Rep dll_thread_table[MAX_THREADS]; + + volatile LONG GC_max_thread_index = 0; + /* Largest index in dll_thread_table */ + /* that was ever used. */ + +/* And now the version used if GC_win32_dll_threads is not set. */ +/* This is a chained hash table, with much of the code borrowed */ +/* From the Posix implementation. */ +#ifndef THREAD_TABLE_SZ +# define THREAD_TABLE_SZ 256 /* Must be power of 2 */ +#endif + STATIC GC_thread GC_threads[THREAD_TABLE_SZ]; + + /* It may not be safe to allocate when we register the first thread. */ + /* Thus we allocated one statically. */ + static struct GC_Thread_Rep first_thread; + static GC_bool first_thread_used = FALSE; + +/* Add a thread to GC_threads. We assume it wasn't already there. */ +/* Caller holds allocation lock. */ +/* Unlike the pthreads version, the id field is set by the caller. */ +STATIC GC_thread GC_new_thread(DWORD id) +{ + word hv = ((word)id) % THREAD_TABLE_SZ; + GC_thread result; + + GC_ASSERT(I_HOLD_LOCK()); + if (!first_thread_used) { + result = &first_thread; + first_thread_used = TRUE; + } else { + GC_ASSERT(!GC_win32_dll_threads); + result = (struct GC_Thread_Rep *) + GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL); + /* result can be NULL */ + if (result == 0) return(0); + } + /* result -> id = id; Done by caller. */ + result -> next = GC_threads[hv]; + GC_threads[hv] = result; +# ifdef GC_PTHREADS + GC_ASSERT(result -> flags == 0 /* && result -> thread_blocked == 0 */); +# endif + return(result); +} extern LONG WINAPI GC_write_fault_handler(struct _EXCEPTION_POINTERS *exc_info); +#if defined(GWW_VDB) && defined(MPROTECT_VDB) + extern GC_bool GC_gww_dirty_init(void); + /* Defined in os_dep.c. Returns TRUE if GetWriteWatch is available. */ + /* may be called repeatedly. */ +#endif + +GC_bool GC_in_thread_creation = FALSE; /* Protected by allocation lock. */ + /* * This may be called from DllMain, and hence operates under unusual - * constraints. + * constraints. In particular, it must be lock-free if GC_win32_dll_threads + * is set. Always called from the thread being added. + * If GC_win32_dll_threads is not set, we already hold the allocation lock, + * except possibly during single-threaded start-up code. */ -static GC_thread GC_new_thread(void) { - int i; - /* It appears to be unsafe to acquire a lock here, since this */ - /* code is apparently not preeemptible on some systems. */ - /* (This is based on complaints, not on Microsoft's official */ - /* documentation, which says this should perform "only simple */ - /* initialization tasks".) */ - /* Hence we make do with nonblocking synchronization. */ - - /* The following should be a noop according to the win32 */ +static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb, + DWORD thread_id) +{ + GC_vthread me; + + /* The following should be a no-op according to the win32 */ /* documentation. There is empirical evidence that it */ /* isn't. - HB */ # if defined(MPROTECT_VDB) - if (GC_incremental) SetUnhandledExceptionFilter(GC_write_fault_handler); +# if defined(GWW_VDB) + if (GC_incremental && !GC_gww_dirty_init()) + SetUnhandledExceptionFilter(GC_write_fault_handler); +# else + if (GC_incremental) SetUnhandledExceptionFilter(GC_write_fault_handler); +# endif # endif + + if (GC_win32_dll_threads) { + int i; + /* It appears to be unsafe to acquire a lock here, since this */ + /* code is apparently not preemptible on some systems. */ + /* (This is based on complaints, not on Microsoft's official */ + /* documentation, which says this should perform "only simple */ + /* initialization tasks".) */ + /* Hence we make do with nonblocking synchronization. */ + /* It has been claimed that DllMain is really only executed with */ + /* a particular system lock held, and thus careful use of locking */ + /* around code that doesn't call back into the system libraries */ + /* might be OK. But this hasn't been tested across all win32 */ + /* variants. */ /* cast away volatile qualifier */ - for (i = 0; InterlockedExchange((IE_t)&thread_table[i].in_use,1) != 0; i++) { - /* Compare-and-swap would make this cleaner, but that's not */ - /* supported before Windows 98 and NT 4.0. In Windows 2000, */ - /* InterlockedExchange is supposed to be replaced by */ - /* InterlockedExchangePointer, but that's not really what I */ - /* want here. */ - if (i == MAX_THREADS - 1) - ABORT("too many threads"); - } - /* Update GC_max_thread_index if necessary. The following is safe, */ - /* and unlike CompareExchange-based solutions seems to work on all */ - /* Windows95 and later platforms. */ - /* Unfortunately, GC_max_thread_index may be temporarily out of */ - /* bounds, so readers have to compensate. */ - while (i > GC_max_thread_index) { - InterlockedIncrement((IE_t)&GC_max_thread_index); - } - if (GC_max_thread_index >= MAX_THREADS) { - /* We overshot due to simultaneous increments. */ - /* Setting it to MAX_THREADS-1 is always safe. */ - GC_max_thread_index = MAX_THREADS - 1; + for (i = 0; InterlockedExchange((void*)&dll_thread_table[i].in_use,1) != 0; + i++) { + /* Compare-and-swap would make this cleaner, but that's not */ + /* supported before Windows 98 and NT 4.0. In Windows 2000, */ + /* InterlockedExchange is supposed to be replaced by */ + /* InterlockedExchangePointer, but that's not really what I */ + /* want here. */ + /* FIXME: We should eventually declare Win95 dead and use AO_ */ + /* primitives here. */ + if (i == MAX_THREADS - 1) + ABORT("too many threads"); + } + /* Update GC_max_thread_index if necessary. The following is safe, */ + /* and unlike CompareExchange-based solutions seems to work on all */ + /* Windows95 and later platforms. */ + /* Unfortunately, GC_max_thread_index may be temporarily out of */ + /* bounds, so readers have to compensate. */ + while (i > GC_max_thread_index) { + InterlockedIncrement((IE_t)&GC_max_thread_index); + } + if (GC_max_thread_index >= MAX_THREADS) { + /* We overshot due to simultaneous increments. */ + /* Setting it to MAX_THREADS-1 is always safe. */ + GC_max_thread_index = MAX_THREADS - 1; + } + me = dll_thread_table + i; + } else /* Not using DllMain */ { + GC_ASSERT(I_HOLD_LOCK()); + GC_in_thread_creation = TRUE; /* OK to collect from unknown thread. */ + me = GC_new_thread(thread_id); + GC_in_thread_creation = FALSE; + if (me == 0) + ABORT("Failed to allocate memory for thread registering."); } - -# ifdef CYGWIN32 - thread_table[i].pthread_id = pthread_self(); +# ifdef GC_PTHREADS + /* me can be NULL -> segfault */ + me -> pthread_id = pthread_self(); # endif + if (!DuplicateHandle(GetCurrentProcess(), - GetCurrentThread(), - GetCurrentProcess(), - (HANDLE*)&thread_table[i].handle, - 0, - 0, - DUPLICATE_SAME_ACCESS)) { - DWORD last_error = GetLastError(); - GC_printf1("Last error code: %lx\n", last_error); + GetCurrentThread(), + GetCurrentProcess(), + (HANDLE*)&(me -> handle), + 0, + 0, + DUPLICATE_SAME_ACCESS)) { + GC_err_printf("Last error code: %d\n", (int)GetLastError()); ABORT("DuplicateHandle failed"); } - thread_table[i].stack_base = GC_get_stack_base(); + me -> last_stack_min = ADDR_LIMIT; + me -> stack_base = sb -> mem_base; +# ifdef IA64 + me -> backing_store_end = sb -> reg_base; +# endif /* Up until this point, GC_push_all_stacks considers this thread */ /* invalid. */ - if (thread_table[i].stack_base == NULL) - ABORT("Failed to find stack base in GC_new_thread"); /* Up until this point, this entry is viewed as reserved but invalid */ /* by GC_delete_thread. */ - thread_table[i].id = GetCurrentThreadId(); - /* If this thread is being created while we are trying to stop */ - /* the world, wait here. Hopefully this can't happen on any */ - /* systems that don't allow us to block here. */ - while (GC_please_stop) Sleep(20); - return thread_table + i; + me -> id = thread_id; +# if defined(THREAD_LOCAL_ALLOC) + GC_init_thread_local((GC_tlfs)(&(me->tlfs))); +# endif + if (me -> stack_base == NULL) + ABORT("Bad stack base in GC_register_my_thread_inner"); + if (GC_win32_dll_threads) { + if (GC_please_stop) { + AO_store(&GC_attached_thread, TRUE); + AO_nop_full(); /* Later updates must become visible after this. */ + } + /* We'd like to wait here, but can't, since waiting in DllMain */ + /* provokes deadlocks. */ + /* Thus we force marking to be restarted instead. */ + } else { + GC_ASSERT(!GC_please_stop); + /* Otherwise both we and the thread stopping code would be */ + /* holding the allocation lock. */ + } + return (GC_thread)(me); } /* @@ -154,7 +455,7 @@ static GC_thread GC_new_thread(void) { #ifdef __GNUC__ __inline__ #endif -LONG GC_get_max_thread_index() +STATIC LONG GC_get_max_thread_index(void) { LONG my_max = GC_max_thread_index; @@ -162,160 +463,427 @@ LONG GC_get_max_thread_index() return my_max; } -/* This is intended to be lock-free, though that */ -/* assumes that the CloseHandle becomes visible before the */ -/* in_use assignment. */ -static void GC_delete_gc_thread(GC_thread thr) +/* Return the GC_thread corresponding to a thread id. May be called */ +/* without a lock, but should be called in contexts in which the */ +/* requested thread cannot be asynchronously deleted, e.g. from the */ +/* thread itself. */ +/* This version assumes that either GC_win32_dll_threads is set, or */ +/* we hold the allocator lock. */ +/* Also used (for assertion checking only) from thread_local_alloc.c. */ +GC_thread GC_lookup_thread_inner(DWORD thread_id) { + if (GC_win32_dll_threads) { + int i; + LONG my_max = GC_get_max_thread_index(); + for (i = 0; + i <= my_max && + (!AO_load_acquire(&(dll_thread_table[i].in_use)) + || dll_thread_table[i].id != thread_id); + /* Must still be in_use, since nobody else can store our thread_id. */ + i++) {} + if (i > my_max) { + return 0; + } else { + return (GC_thread)(dll_thread_table + i); + } + } else { + word hv = ((word)thread_id) % THREAD_TABLE_SZ; + register GC_thread p = GC_threads[hv]; + + GC_ASSERT(I_HOLD_LOCK()); + while (p != 0 && p -> id != thread_id) p = p -> next; + return(p); + } +} + +/* Make sure thread descriptor t is not protected by the VDB */ +/* implementation. */ +/* Used to prevent write faults when the world is (partially) stopped, */ +/* since it may have been stopped with a system lock held, and that */ +/* lock may be required for fault handling. */ +# if defined(MPROTECT_VDB) && !defined(MSWINCE) +# define UNPROTECT(t) \ + if (GC_dirty_maintained && !GC_win32_dll_threads && \ + t != &first_thread) { \ + GC_ASSERT(SMALL_OBJ(GC_size(t))); \ + GC_remove_protection(HBLKPTR(t), 1, FALSE); \ + } +# else +# define UNPROTECT(p) +# endif + +/* If a thread has been joined, but we have not yet */ +/* been notified, then there may be more than one thread */ +/* in the table with the same win32 id. */ +/* This is OK, but we need a way to delete a specific one. */ +/* Assumes we hold the allocation lock unless */ +/* GC_win32_dll_threads is set. */ +/* If GC_win32_dll_threads is set it should be called from the */ +/* thread being deleted. */ +STATIC void GC_delete_gc_thread(GC_vthread gc_id) { - CloseHandle(thr->handle); - /* cast away volatile qualifier */ - thr->stack_base = 0; - thr->id = 0; + CloseHandle(gc_id->handle); + if (GC_win32_dll_threads) { + /* This is intended to be lock-free. */ + /* It is either called synchronously from the thread being deleted, */ + /* or by the joining thread. */ + /* In this branch asynchronous changes to *gc_id are possible. */ + gc_id -> stack_base = 0; + gc_id -> id = 0; # ifdef CYGWIN32 - thr->pthread_id = 0; + gc_id -> pthread_id = 0; # endif /* CYGWIN32 */ - thr->in_use = FALSE; +# ifdef GC_WIN32_PTHREADS + gc_id -> pthread_id.p = NULL; +# endif /* GC_WIN32_PTHREADS */ + AO_store_release(&(gc_id->in_use), FALSE); + } else { + /* Cast away volatile qualifier, since we have lock. */ + GC_thread gc_nvid = (GC_thread)gc_id; + DWORD id = gc_nvid -> id; + word hv = ((word)id) % THREAD_TABLE_SZ; + register GC_thread p = GC_threads[hv]; + register GC_thread prev = 0; + + GC_ASSERT(I_HOLD_LOCK()); + while (p != gc_nvid) { + prev = p; + p = p -> next; + } + if (prev == 0) { + GC_threads[hv] = p -> next; + } else { + prev -> next = p -> next; + } + GC_INTERNAL_FREE(p); + } } -static void GC_delete_thread(DWORD thread_id) { - int i; - LONG my_max = GC_get_max_thread_index(); +/* Delete a thread from GC_threads. We assume it is there. */ +/* (The code intentionally traps if it wasn't.) */ +/* Assumes we hold the allocation lock unless */ +/* GC_win32_dll_threads is set. */ +/* If GC_win32_dll_threads is set it should be called from the */ +/* thread being deleted. */ +STATIC void GC_delete_thread(DWORD id) +{ + if (GC_win32_dll_threads) { + GC_thread t = GC_lookup_thread_inner(id); - for (i = 0; - i <= my_max && - (!thread_table[i].in_use || thread_table[i].id != thread_id); - /* Must still be in_use, since nobody else can store our thread_id. */ - i++) {} - if (i > my_max) { - WARN("Removing nonexistent thread %ld\n", (GC_word)thread_id); + if (0 == t) { + WARN("Removing nonexistent thread %ld\n", (long)id); + } else { + GC_delete_gc_thread(t); + } } else { - GC_delete_gc_thread(thread_table+i); + word hv = ((word)id) % THREAD_TABLE_SZ; + register GC_thread p = GC_threads[hv]; + register GC_thread prev = 0; + + GC_ASSERT(I_HOLD_LOCK()); + while (p -> id != id) { + prev = p; + p = p -> next; + } + CloseHandle(p->handle); + if (prev == 0) { + GC_threads[hv] = p -> next; + } else { + prev -> next = p -> next; + } + GC_INTERNAL_FREE(p); } } +GC_API void GC_CALL GC_allow_register_threads(void) +{ + /* Check GC is initialized and the current thread is registered. */ + GC_ASSERT(GC_lookup_thread_inner(GetCurrentThreadId()) != 0); + +# if defined(GC_DLL) && !defined(PARALLEL_MARK) && !defined(THREAD_LOCAL_ALLOC) + /* GC_init_parallel() is not called from GC_init_inner(). */ + parallel_initialized = TRUE; +# endif + GC_need_to_lock = TRUE; /* We are multi-threaded now. */ +} + +GC_API int GC_CALL GC_register_my_thread(struct GC_stack_base *sb) { + DWORD t = GetCurrentThreadId(); + + if (GC_need_to_lock == FALSE) + ABORT("Threads explicit registering is not previously enabled"); + + /* We lock here, since we want to wait for an ongoing GC. */ + LOCK(); + if (0 == GC_lookup_thread_inner(t)) { + GC_register_my_thread_inner(sb, t); + UNLOCK(); + return GC_SUCCESS; + } else { + UNLOCK(); + return GC_DUPLICATE; + } +} -#ifdef CYGWIN32 +GC_API int GC_CALL GC_unregister_my_thread(void) +{ + DWORD t = GetCurrentThreadId(); + + if (GC_win32_dll_threads) { +# if defined(THREAD_LOCAL_ALLOC) + /* Can't happen: see GC_use_DllMain(). */ + GC_ASSERT(FALSE); +# endif + /* FIXME: Should we just ignore this? */ + GC_delete_thread(t); + } else { + LOCK(); +# if defined(THREAD_LOCAL_ALLOC) + { + GC_thread me = GC_lookup_thread_inner(t); + GC_destroy_thread_local(&(me->tlfs)); + } +# endif + GC_delete_thread(t); + UNLOCK(); + } + return GC_SUCCESS; +} + + +#ifdef GC_PTHREADS + +/* A quick-and-dirty cache of the mapping between pthread_t */ +/* and win32 thread id. */ +#define PTHREAD_MAP_SIZE 512 +DWORD GC_pthread_map_cache[PTHREAD_MAP_SIZE]; +#define HASH(pthread_id) ((NUMERIC_THREAD_ID(pthread_id) >> 5) % PTHREAD_MAP_SIZE) + /* It appears pthread_t is really a pointer type ... */ +#define SET_PTHREAD_MAP_CACHE(pthread_id, win32_id) \ + (GC_pthread_map_cache[HASH(pthread_id)] = (win32_id)) +#define GET_PTHREAD_MAP_CACHE(pthread_id) \ + GC_pthread_map_cache[HASH(pthread_id)] /* Return a GC_thread corresponding to a given pthread_t. */ /* Returns 0 if it's not there. */ /* We assume that this is only called for pthread ids that */ -/* have not yet terminated or are still joinable. */ -static GC_thread GC_lookup_thread(pthread_t id) +/* have not yet terminated or are still joinable, and */ +/* cannot be concurrently terminated. */ +/* Assumes we do NOT hold the allocation lock. */ +static GC_thread GC_lookup_pthread(pthread_t id) { - int i; - LONG my_max = GC_get_max_thread_index(); + if (GC_win32_dll_threads) { + int i; + LONG my_max = GC_get_max_thread_index(); - for (i = 0; - i <= my_max && - (!thread_table[i].in_use || thread_table[i].pthread_id != id - || !thread_table[i].in_use); + for (i = 0; + i <= my_max && + (!AO_load_acquire(&(dll_thread_table[i].in_use)) + || THREAD_EQUAL(dll_thread_table[i].pthread_id, id)); /* Must still be in_use, since nobody else can store our thread_id. */ i++); - if (i > my_max) return 0; - return thread_table + i; + if (i > my_max) return 0; + return (GC_thread)(dll_thread_table + i); + } else { + /* We first try the cache. If that fails, we use a very slow */ + /* approach. */ + int hv_guess = GET_PTHREAD_MAP_CACHE(id) % THREAD_TABLE_SZ; + int hv; + GC_thread p; + + LOCK(); + for (p = GC_threads[hv_guess]; 0 != p; p = p -> next) { + if (THREAD_EQUAL(p -> pthread_id, id)) + goto foundit; + } + for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) { + for (p = GC_threads[hv]; 0 != p; p = p -> next) { + if (THREAD_EQUAL(p -> pthread_id, id)) + goto foundit; + } + } + p = 0; + foundit: + UNLOCK(); + return p; + } } -#endif /* CYGWIN32 */ +#endif /* GC_PTHREADS */ -void GC_push_thread_structures GC_PROTO((void)) +void GC_push_thread_structures(void) { + GC_ASSERT(I_HOLD_LOCK()); + if (GC_win32_dll_threads) { /* Unlike the other threads implementations, the thread table here */ /* contains no pointers to the collectable heap. Thus we have */ /* no private structures we need to preserve. */ -# ifdef CYGWIN32 - { int i; /* pthreads may keep a pointer in the thread exit value */ - LONG my_max = GC_get_max_thread_index(); - - for (i = 0; i <= my_max; i++) - if (thread_table[i].in_use) - GC_push_all((ptr_t)&(thread_table[i].status), - (ptr_t)(&(thread_table[i].status)+1)); +# ifdef GC_PTHREADS + { int i; /* pthreads may keep a pointer in the thread exit value */ + LONG my_max = GC_get_max_thread_index(); + + for (i = 0; i <= my_max; i++) + if (dll_thread_table[i].in_use) + GC_push_all((ptr_t)&(dll_thread_table[i].status), + (ptr_t)(&(dll_thread_table[i].status)+1)); + } +# endif + } else { + GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads)); } +# if defined(THREAD_LOCAL_ALLOC) + GC_push_all((ptr_t)(&GC_thread_key), + (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key)); + /* Just in case we ever use our own TLS implementation. */ +# endif +} + +#if defined(MPROTECT_VDB) && !defined(MSWINCE) + extern volatile AO_TS_t GC_fault_handler_lock; /* from os_dep.c */ +#endif + +/* Suspend the given thread, if it's still active. */ +STATIC void GC_suspend(GC_thread t) +{ +# ifdef MSWINCE + /* SuspendThread will fail if thread is running kernel code */ + while (SuspendThread(t -> handle) == (DWORD)-1) + Sleep(10); +# else + /* Apparently the Windows 95 GetOpenFileName call creates */ + /* a thread that does not properly get cleaned up, and */ + /* SuspendThread on its descriptor may provoke a crash. */ + /* This reduces the probability of that event, though it still */ + /* appears there's a race here. */ + DWORD exitCode; + + UNPROTECT(t); + if (GetExitCodeThread(t -> handle, &exitCode) && + exitCode != STILL_ACTIVE) { +# ifdef GC_PTHREADS + t -> stack_base = 0; /* prevent stack from being pushed */ +# else + /* this breaks pthread_join on Cygwin, which is guaranteed to */ + /* only see user pthreads */ + GC_ASSERT(GC_win32_dll_threads); + GC_delete_gc_thread(t); +# endif + return; + } +# if defined(MPROTECT_VDB) && !defined(MSWINCE) + /* Acquire the spin lock we use to update dirty bits. */ + /* Threads shouldn't get stopped holding it. But we may */ + /* acquire and release it in the UNPROTECT call. */ + while (AO_test_and_set_acquire(&GC_fault_handler_lock) == AO_TS_SET) {} +# endif + + if (SuspendThread(t -> handle) == (DWORD)-1) + ABORT("SuspendThread failed"); # endif + t -> suspended = TRUE; +# if defined(MPROTECT_VDB) && !defined(MSWINCE) + AO_CLEAR(&GC_fault_handler_lock); +# endif } /* Defined in misc.c */ -extern CRITICAL_SECTION GC_write_cs; +#ifndef CYGWIN32 + extern CRITICAL_SECTION GC_write_cs; +#endif -void GC_stop_world() +void GC_stop_world(void) { DWORD thread_id = GetCurrentThreadId(); int i; + int my_max; if (!GC_thr_initialized) ABORT("GC_stop_world() called before GC_thr_init()"); + GC_ASSERT(I_HOLD_LOCK()); + + /* This code is the same as in pthread_stop_world.c */ +# ifdef PARALLEL_MARK + if (GC_parallel) { + GC_acquire_mark_lock(); + GC_ASSERT(GC_fl_builder_count == 0); + /* We should have previously waited for it to become zero. */ + } +# endif /* PARALLEL_MARK */ GC_please_stop = TRUE; # ifndef CYGWIN32 EnterCriticalSection(&GC_write_cs); -# endif /* !CYGWIN32 */ - for (i = 0; i <= GC_get_max_thread_index(); i++) - if (thread_table[i].stack_base != 0 - && thread_table[i].id != thread_id) { -# ifdef MSWINCE - /* SuspendThread will fail if thread is running kernel code */ - while (SuspendThread(thread_table[i].handle) == (DWORD)-1) - Sleep(10); -# else - /* Apparently the Windows 95 GetOpenFileName call creates */ - /* a thread that does not properly get cleaned up, and */ - /* SuspendThread on its descriptor may provoke a crash. */ - /* This reduces the probability of that event, though it still */ - /* appears there's a race here. */ - DWORD exitCode; - if (GetExitCodeThread(thread_table[i].handle,&exitCode) && - exitCode != STILL_ACTIVE) { - thread_table[i].stack_base = 0; /* prevent stack from being pushed */ -# ifndef CYGWIN32 - /* this breaks pthread_join on Cygwin, which is guaranteed to */ - /* only see user pthreads */ - thread_table[i].in_use = FALSE; - CloseHandle(thread_table[i].handle); -# endif - continue; - } - if (SuspendThread(thread_table[i].handle) == (DWORD)-1) - ABORT("SuspendThread failed"); -# endif - thread_table[i].suspended = TRUE; +# endif + if (GC_win32_dll_threads) { + /* Any threads being created during this loop will end up setting */ + /* GC_attached_thread when they start. This will force marking to */ + /* restart. */ + /* This is not ideal, but hopefully correct. */ + GC_attached_thread = FALSE; + my_max = (int)GC_get_max_thread_index(); + for (i = 0; i <= my_max; i++) { + GC_vthread t = dll_thread_table + i; + if (t -> stack_base != 0 + && t -> id != thread_id) { + GC_suspend((GC_thread)t); + } } + } else { + GC_thread t; + int i; + + for (i = 0; i < THREAD_TABLE_SZ; i++) { + for (t = GC_threads[i]; t != 0; t = t -> next) { + if (t -> stack_base != 0 + && !KNOWN_FINISHED(t) + && t -> id != thread_id) { + GC_suspend(t); + } + } + } + } # ifndef CYGWIN32 LeaveCriticalSection(&GC_write_cs); -# endif /* !CYGWIN32 */ +# endif +# ifdef PARALLEL_MARK + if (GC_parallel) + GC_release_mark_lock(); +# endif } -void GC_start_world() +void GC_start_world(void) { DWORD thread_id = GetCurrentThreadId(); int i; - LONG my_max = GC_get_max_thread_index(); - for (i = 0; i <= my_max; i++) - if (thread_table[i].stack_base != 0 && thread_table[i].suspended - && thread_table[i].id != thread_id) { - if (ResumeThread(thread_table[i].handle) == (DWORD)-1) - ABORT("ResumeThread failed"); - thread_table[i].suspended = FALSE; + GC_ASSERT(I_HOLD_LOCK()); + if (GC_win32_dll_threads) { + LONG my_max = GC_get_max_thread_index(); + for (i = 0; i <= my_max; i++) { + GC_thread t = (GC_thread)(dll_thread_table + i); + if (t -> stack_base != 0 && t -> suspended + && t -> id != thread_id) { + if (ResumeThread(t -> handle) == (DWORD)-1) + ABORT("ResumeThread failed"); + t -> suspended = FALSE; + } } - GC_please_stop = FALSE; -} - -# ifdef _MSC_VER -# pragma warning(disable:4715) -# endif -ptr_t GC_current_stackbottom() -{ - DWORD thread_id = GetCurrentThreadId(); - int i; - LONG my_max = GC_get_max_thread_index(); + } else { + GC_thread t; + int i; - for (i = 0; i <= my_max; i++) - if (thread_table[i].stack_base && thread_table[i].id == thread_id) - return thread_table[i].stack_base; - ABORT("no thread table entry for current thread"); + for (i = 0; i < THREAD_TABLE_SZ; i++) { + for (t = GC_threads[i]; t != 0; t = t -> next) { + if (t -> stack_base != 0 && t -> suspended + && t -> id != thread_id) { + if (ResumeThread(t -> handle) == (DWORD)-1) + ABORT("ResumeThread failed"); + UNPROTECT(t); + t -> suspended = FALSE; + } + } + } + } + GC_please_stop = FALSE; } -# ifdef _MSC_VER -# pragma warning(default:4715) -# endif # ifdef MSWINCE /* The VirtualQuery calls below won't work properly on WinCE, but */ @@ -324,40 +892,61 @@ ptr_t GC_current_stackbottom() # define GC_get_stack_min(s) \ ((ptr_t)(((DWORD)(s) - 1) & 0xFFFF0000)) # else + + /* A cache holding the results of the last VirtualQuery call. */ + /* Protected by the allocation lock. */ + static ptr_t last_address = 0; + static MEMORY_BASIC_INFORMATION last_info; + + /* Probe stack memory region (starting at "s") to find out its */ + /* lowest address (i.e. stack top). */ + /* S must be a mapped address inside the region, NOT the first */ + /* unmapped address. */ static ptr_t GC_get_stack_min(ptr_t s) { ptr_t bottom; - MEMORY_BASIC_INFORMATION info; - VirtualQuery(s, &info, sizeof(info)); + + GC_ASSERT(I_HOLD_LOCK()); + if (s != last_address) { + VirtualQuery(s, &last_info, sizeof(last_info)); + last_address = s; + } do { - bottom = info.BaseAddress; - VirtualQuery(bottom - 1, &info, sizeof(info)); - } while ((info.Protect & PAGE_READWRITE) - && !(info.Protect & PAGE_GUARD)); + bottom = last_info.BaseAddress; + VirtualQuery(bottom - 1, &last_info, sizeof(last_info)); + last_address = bottom - 1; + } while ((last_info.Protect & PAGE_READWRITE) + && !(last_info.Protect & PAGE_GUARD)); return(bottom); } + + /* Return true if the page at s has protections appropriate */ + /* for a stack page. */ + static GC_bool GC_may_be_in_stack(ptr_t s) + { + GC_ASSERT(I_HOLD_LOCK()); + if (s != last_address) { + VirtualQuery(s, &last_info, sizeof(last_info)); + last_address = s; + } + return (last_info.Protect & PAGE_READWRITE) + && !(last_info.Protect & PAGE_GUARD); + } # endif -void GC_push_all_stacks() +STATIC void GC_push_stack_for(GC_thread thread) { - DWORD thread_id = GetCurrentThreadId(); - GC_bool found_me = FALSE; - int i; - int dummy; - ptr_t sp, stack_min; - GC_thread thread; - LONG my_max = GC_get_max_thread_index(); - - for (i = 0; i <= my_max; i++) { - thread = thread_table + i; - if (thread -> in_use && thread -> stack_base) { - if (thread -> id == thread_id) { + int dummy; + ptr_t sp, stack_min; + DWORD me = GetCurrentThreadId(); + + if (thread -> stack_base) { + if (thread -> id == me) { sp = (ptr_t) &dummy; - found_me = TRUE; } else { CONTEXT context; context.ContextFlags = CONTEXT_INTEGER|CONTEXT_CONTROL; - if (!GetThreadContext(thread_table[i].handle, &context)) + if (!GetThreadContext(thread -> handle, &context)) ABORT("GetThreadContext failed"); /* Push all registers that might point into the heap. Frame */ @@ -369,6 +958,10 @@ void GC_push_all_stacks() # if defined(I386) PUSH4(Edi,Esi,Ebx,Edx), PUSH2(Ecx,Eax), PUSH1(Ebp); sp = (ptr_t)context.Esp; +# elif defined(X86_64) + PUSH4(Rax,Rcx,Rdx,Rbx); PUSH2(Rbp, Rsi); PUSH1(Rdi); + PUSH4(R8, R9, R10, R11); PUSH4(R12, R13, R14, R15); + sp = (ptr_t)context.Rsp; # elif defined(ARM32) PUSH4(R0,R1,R2,R3),PUSH4(R4,R5,R6,R7),PUSH4(R8,R9,R10,R11),PUSH1(R12); sp = (ptr_t)context.Sp; @@ -397,109 +990,593 @@ void GC_push_all_stacks() # else # error "architecture is not supported" # endif + } /* ! current thread */ + + /* Set stack_min to the lowest address in the thread stack, */ + /* or to an address in the thread stack no larger than sp, */ + /* taking advantage of the old value to avoid slow traversals */ + /* of large stacks. */ + if (thread -> last_stack_min == ADDR_LIMIT) { + stack_min = GC_get_stack_min(thread -> stack_base); + UNPROTECT(thread); + thread -> last_stack_min = stack_min; + } else { + if (sp < thread -> stack_base && sp >= thread -> last_stack_min) { + stack_min = sp; + } else { +# ifdef MSWINCE + stack_min = GC_get_stack_min(thread -> stack_base); +# else + if (GC_may_be_in_stack(thread -> last_stack_min)) { + stack_min = GC_get_stack_min(thread -> last_stack_min); + } else { + /* Stack shrunk? Is this possible? */ + stack_min = GC_get_stack_min(thread -> stack_base); + } +# endif + UNPROTECT(thread); + thread -> last_stack_min = stack_min; + } } - - stack_min = GC_get_stack_min(thread->stack_base); - - if (sp >= stack_min && sp < thread->stack_base) + GC_ASSERT(stack_min == GC_get_stack_min(thread -> stack_base) + || (sp >= stack_min && stack_min < thread -> stack_base + && stack_min > GC_get_stack_min(thread -> stack_base))); + + if (sp >= stack_min && sp < thread->stack_base) { +# ifdef DEBUG_THREADS + GC_printf("Pushing stack for 0x%x from sp %p to %p from 0x%x\n", + (int)thread -> id, sp, thread -> stack_base, (int)me); +# endif GC_push_all_stack(sp, thread->stack_base); - else { - WARN("Thread stack pointer 0x%lx out of range, pushing everything\n", - (unsigned long)sp); + } else { + /* If not current thread then it is possible for sp to point to */ + /* the guarded (untouched yet) page just below the current */ + /* stack_min of the thread. */ + if (thread -> id == me || sp >= thread->stack_base + || sp + GC_page_size < stack_min) + WARN("Thread stack pointer %p out of range, pushing everything\n", + sp); +# ifdef DEBUG_THREADS + GC_printf("Pushing stack for 0x%x from (min) %p to %p from 0x%x\n", + (int)thread -> id, stack_min, + thread -> stack_base, (int)me); +# endif GC_push_all_stack(stack_min, thread->stack_base); } + } /* thread looks live */ +} + +void GC_push_all_stacks(void) +{ + DWORD me = GetCurrentThreadId(); + GC_bool found_me = FALSE; +# ifndef SMALL_CONFIG + unsigned nthreads = 0; +# endif + + if (GC_win32_dll_threads) { + int i; + LONG my_max = GC_get_max_thread_index(); + + for (i = 0; i <= my_max; i++) { + GC_thread t = (GC_thread)(dll_thread_table + i); + if (t -> in_use) { +# ifndef SMALL_CONFIG + ++nthreads; +# endif + GC_push_stack_for(t); + if (t -> id == me) found_me = TRUE; + } + } + } else { + GC_thread t; + int i; + + for (i = 0; i < THREAD_TABLE_SZ; i++) { + for (t = GC_threads[i]; t != 0; t = t -> next) { +# ifndef SMALL_CONFIG + ++nthreads; +# endif + if (!KNOWN_FINISHED(t)) GC_push_stack_for(t); + if (t -> id == me) found_me = TRUE; + } } } - if (!found_me) ABORT("Collecting from unknown thread."); +# ifndef SMALL_CONFIG + if (GC_print_stats == VERBOSE) { + GC_log_printf("Pushed %d thread stacks%s\n", nthreads, + GC_win32_dll_threads ? " based on DllMain thread tracking" : ""); + } +# endif + if (!found_me && !GC_in_thread_creation) + ABORT("Collecting from unknown thread."); } -void GC_get_next_stack(char *start, char **lo, char **hi) +#ifdef PARALLEL_MARK + +# ifndef MAX_MARKERS +# define MAX_MARKERS 16 +# endif + + extern long GC_markers; /* Number of mark threads we would */ + /* like to have. Includes the */ + /* initiating thread. */ + + STATIC ptr_t marker_sp[MAX_MARKERS - 1]; /* The cold end of the stack */ + /* for markers. */ +# ifdef IA64 + STATIC ptr_t marker_bsp[MAX_MARKERS - 1]; +# endif + + STATIC ptr_t marker_last_stack_min[MAX_MARKERS - 1]; + /* Last known minimum (hottest) address */ + /* in stack (or ADDR_LIMIT if unset) */ + /* for markers. */ + +#endif + +/* Find stack with the lowest address which overlaps the */ +/* interval [start, limit). */ +/* Return stack bounds in *lo and *hi. If no such stack */ +/* is found, both *hi and *lo will be set to an address */ +/* higher than limit. */ +void GC_get_next_stack(char *start, char *limit, + char **lo, char **hi) { int i; -# define ADDR_LIMIT (char *)(-1L) - char * current_min = ADDR_LIMIT; - LONG my_max = GC_get_max_thread_index(); + char * current_min = ADDR_LIMIT; /* Least in-range stack base */ + ptr_t *plast_stack_min = NULL; /* Address of last_stack_min */ + /* field for thread corresponding */ + /* to current_min. */ + GC_thread thread = NULL; /* Either NULL or points to the */ + /* thread's hash table entry */ + /* containing *plast_stack_min. */ + + /* First set current_min, ignoring limit. */ + if (GC_win32_dll_threads) { + LONG my_max = GC_get_max_thread_index(); - for (i = 0; i <= my_max; i++) { - char * s = (char *)thread_table[i].stack_base; + for (i = 0; i <= my_max; i++) { + ptr_t s = (ptr_t)(dll_thread_table[i].stack_base); - if (0 != s && s > start && s < current_min) { + if (s > start && s < current_min) { + /* Update address of last_stack_min. */ + plast_stack_min = (ptr_t * /* no volatile */) + &dll_thread_table[i].last_stack_min; current_min = s; - } - } + } + } + } else { + for (i = 0; i < THREAD_TABLE_SZ; i++) { + GC_thread t; + + for (t = GC_threads[i]; t != 0; t = t -> next) { + ptr_t s = t -> stack_base; + + if (s > start && s < current_min) { + /* Update address of last_stack_min. */ + plast_stack_min = &t -> last_stack_min; + thread = t; /* Remember current thread to unprotect. */ + current_min = s; + } + } + } +# ifdef PARALLEL_MARK + for (i = 0; i < GC_markers - 1; ++i) { + ptr_t s = marker_sp[i]; +# ifdef IA64 + /* FIXME: not implemented */ +# endif + if (s > start && s < current_min) { + GC_ASSERT(marker_last_stack_min[i] != NULL); + plast_stack_min = &marker_last_stack_min[i]; + current_min = s; + thread = NULL; /* Not a thread's hash table entry. */ + } + } +# endif + } + *hi = current_min; if (current_min == ADDR_LIMIT) { *lo = ADDR_LIMIT; return; } - *lo = GC_get_stack_min(current_min); - if (*lo < start) *lo = start; + + GC_ASSERT(current_min > start); + +# ifndef MSWINCE + if (current_min > limit && !GC_may_be_in_stack(limit)) { + /* Skip the rest since the memory region at limit address is */ + /* not a stack (so the lowest address of the found stack would */ + /* be above the limit value anyway). */ + *lo = ADDR_LIMIT; + return; + } +# endif + + /* Get the minimum address of the found stack by probing its memory */ + /* region starting from the last known minimum (if set). */ + if (*plast_stack_min == ADDR_LIMIT +# ifndef MSWINCE + || !GC_may_be_in_stack(*plast_stack_min) +# endif + ) { + /* Unsafe to start from last value. */ + *lo = GC_get_stack_min(current_min); + } else { + /* Use last value value to optimize search for min address */ + *lo = GC_get_stack_min(*plast_stack_min); + } + + /* Remember current stack_min value. */ + if (thread != NULL) { + UNPROTECT(thread); + } + *plast_stack_min = *lo; } -#if !defined(CYGWIN32) +#ifdef PARALLEL_MARK -#if !defined(MSWINCE) && defined(GC_DLL) + /* GC_mark_thread() is the same as in pthread_support.c */ +#ifdef GC_PTHREADS + STATIC void * GC_mark_thread(void * id) +#else + STATIC unsigned __stdcall GC_mark_thread(void * id) +#endif +{ + word my_mark_no = 0; -/* We register threads from DllMain */ + marker_sp[(word)id] = GC_approx_sp(); +# ifdef IA64 + marker_bsp[(word)id] = GC_save_regs_in_stack(); +# endif -GC_API HANDLE WINAPI GC_CreateThread( - LPSECURITY_ATTRIBUTES lpThreadAttributes, - DWORD dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress, - LPVOID lpParameter, DWORD dwCreationFlags, LPDWORD lpThreadId ) + if ((word)id == (word)-1) return 0; /* to make compiler happy */ + + for (;; ++my_mark_no) { + if (my_mark_no - GC_mark_no > (word)2) { + /* resynchronize if we get far off, e.g. because GC_mark_no */ + /* wrapped. */ + my_mark_no = GC_mark_no; + } +# ifdef DEBUG_THREADS + GC_printf("Starting mark helper for mark number %lu\n", + (unsigned long)my_mark_no); +# endif + GC_help_marker(my_mark_no); + } +} + +#ifdef GC_ASSERTIONS + unsigned long GC_mark_lock_holder = NO_THREAD; +#endif + +/* GC_mark_threads[] is unused here unlike that in pthread_support.c */ + +#ifdef GC_PTHREADS + +/* start_mark_threads() is the same as in pthread_support.c except for: */ +/* - GC_markers value is adjusted already; */ +/* - thread stack is assumed to be large enough; and */ +/* - statistics about the number of marker threads is already printed. */ + +STATIC void start_mark_threads(void) { - return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, - lpParameter, dwCreationFlags, lpThreadId); + unsigned i; + pthread_attr_t attr; + pthread_t new_thread; + + if (0 != pthread_attr_init(&attr)) ABORT("pthread_attr_init failed"); + + if (0 != pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) + ABORT("pthread_attr_setdetachstate failed"); + + for (i = 0; i < GC_markers - 1; ++i) { + marker_last_stack_min[i] = ADDR_LIMIT; + if (0 != pthread_create(&new_thread, &attr, + GC_mark_thread, (void *)(word)i)) { + WARN("Marker thread creation failed, errno = %ld.\n", + /* (word) */ errno); + } + } + pthread_attr_destroy(&attr); } -#else /* defined(MSWINCE) || !defined(GC_DLL)) */ +STATIC pthread_mutex_t mark_mutex = PTHREAD_MUTEX_INITIALIZER; -/* We have no DllMain to take care of new threads. Thus we */ -/* must properly intercept thread creation. */ +STATIC pthread_cond_t builder_cv = PTHREAD_COND_INITIALIZER; -typedef struct { - LPTHREAD_START_ROUTINE start; - LPVOID param; -} thread_args; +/* GC_acquire/release_mark_lock(), GC_wait_builder/marker(), */ +/* GC_wait_for_reclaim(), GC_notify_all_builder/marker() are the same */ +/* as in pthread_support.c except that GC_generic_lock() is not used. */ + +#ifdef LOCK_STATS + AO_t GC_block_count = 0; +#endif -static DWORD WINAPI thread_start(LPVOID arg); +void GC_acquire_mark_lock(void) +{ + if (pthread_mutex_lock(&mark_mutex) != 0) { + ABORT("pthread_mutex_lock failed"); + } +# ifdef LOCK_STATS + (void)AO_fetch_and_add1(&GC_block_count); +# endif + /* GC_generic_lock(&mark_mutex); */ +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self()); +# endif +} -GC_API HANDLE WINAPI GC_CreateThread( - LPSECURITY_ATTRIBUTES lpThreadAttributes, - DWORD dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress, - LPVOID lpParameter, DWORD dwCreationFlags, LPDWORD lpThreadId ) +void GC_release_mark_lock(void) { - HANDLE thread_h = NULL; + GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self())); +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = NO_THREAD; +# endif + if (pthread_mutex_unlock(&mark_mutex) != 0) { + ABORT("pthread_mutex_unlock failed"); + } +} - thread_args *args; +/* Collector must wait for a freelist builders for 2 reasons: */ +/* 1) Mark bits may still be getting examined without lock. */ +/* 2) Partial free lists referenced only by locals may not be scanned */ +/* correctly, e.g. if they contain "pointer-free" objects, since the */ +/* free-list link may be ignored. */ +/* STATIC */ void GC_wait_builder(void) +{ + GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self())); +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = NO_THREAD; +# endif + if (pthread_cond_wait(&builder_cv, &mark_mutex) != 0) { + ABORT("pthread_cond_wait failed"); + } + GC_ASSERT(GC_mark_lock_holder == NO_THREAD); +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self()); +# endif +} - if (!GC_is_initialized) GC_init(); - /* make sure GC is initialized (i.e. main thread is attached) */ +void GC_wait_for_reclaim(void) +{ + GC_acquire_mark_lock(); + while (GC_fl_builder_count > 0) { + GC_wait_builder(); + } + GC_release_mark_lock(); +} + +void GC_notify_all_builder(void) +{ + GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self())); + if (pthread_cond_broadcast(&builder_cv) != 0) { + ABORT("pthread_cond_broadcast failed"); + } +} + +STATIC pthread_cond_t mark_cv = PTHREAD_COND_INITIALIZER; + +void GC_wait_marker(void) +{ + GC_ASSERT(GC_mark_lock_holder == NUMERIC_THREAD_ID(pthread_self())); +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = NO_THREAD; +# endif + if (pthread_cond_wait(&mark_cv, &mark_mutex) != 0) { + ABORT("pthread_cond_wait failed"); + } + GC_ASSERT(GC_mark_lock_holder == NO_THREAD); +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = NUMERIC_THREAD_ID(pthread_self()); +# endif +} + +void GC_notify_all_marker(void) +{ + if (pthread_cond_broadcast(&mark_cv) != 0) { + ABORT("pthread_cond_broadcast failed"); + } +} + +#else /* ! GC_PTHREADS */ + +STATIC void start_mark_threads(void) +{ + int i; + GC_uintptr_t handle; + unsigned thread_id; + + for (i = 0; i < GC_markers - 1; ++i) { + marker_last_stack_min[i] = ADDR_LIMIT; + handle = _beginthreadex(NULL /* security_attr */, 0 /* stack_size */, + GC_mark_thread, (void *)(word)i, 0 /* flags */, + &thread_id); + if (!handle || handle == (GC_uintptr_t)-1L) + WARN("Marker thread creation failed\n", 0); + else { /* We may detach the thread (if handle is of HANDLE type) */ + /* CloseHandle((HANDLE)handle); */ + } + } +} + +STATIC HANDLE mark_mutex_event = (HANDLE)0; /* Event with auto-reset. */ +volatile AO_t GC_mark_mutex_waitcnt = 0; /* Number of waiters + 1; */ + /* 0 - unlocked. */ + +STATIC HANDLE builder_cv = (HANDLE)0; /* Event with manual reset */ + +/* mark_mutex_event, builder_cv, mark_cv are initialized in GC_thr_init(). */ + +/* #define LOCK_STATS */ +#ifdef LOCK_STATS + AO_t GC_block_count = 0; + AO_t GC_unlocked_count = 0; +#endif + +void GC_acquire_mark_lock(void) +{ + if (AO_fetch_and_add1_acquire(&GC_mark_mutex_waitcnt) != 0) { +# ifdef LOCK_STATS + (void)AO_fetch_and_add1(&GC_block_count); +# endif + if (WaitForSingleObject(mark_mutex_event, INFINITE) == WAIT_FAILED) + ABORT("WaitForSingleObject() failed"); + } +# ifdef LOCK_STATS + else { + (void)AO_fetch_and_add1(&GC_unlocked_count); + } +# endif + + GC_ASSERT(GC_mark_lock_holder == NO_THREAD); +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = (unsigned long)GetCurrentThreadId(); +# endif +} + +void GC_release_mark_lock(void) +{ + GC_ASSERT(GC_mark_lock_holder == (unsigned long)GetCurrentThreadId()); +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = NO_THREAD; +# endif + GC_ASSERT(AO_load(&GC_mark_mutex_waitcnt) != 0); + if (AO_fetch_and_sub1_release(&GC_mark_mutex_waitcnt) > 1 && + SetEvent(mark_mutex_event) == FALSE) + ABORT("SetEvent() failed"); +} + +/* In GC_wait_for_reclaim/GC_notify_all_builder() we emulate POSIX */ +/* cond_wait/cond_broadcast() primitives with WinAPI Event object */ +/* (working in "manual reset" mode). This works here because */ +/* GC_notify_all_builder() is always called holding lock on */ +/* mark_mutex and the checked condition (GC_fl_builder_count == 0) */ +/* is the only one for which broadcasting on builder_cv is performed. */ + +void GC_wait_for_reclaim(void) +{ + GC_ASSERT(builder_cv != 0); + for (;;) { + GC_acquire_mark_lock(); + if (GC_fl_builder_count == 0) + break; + if (ResetEvent(builder_cv) == FALSE) + ABORT("ResetEvent() failed"); + GC_release_mark_lock(); + if (WaitForSingleObject(builder_cv, INFINITE) == WAIT_FAILED) + ABORT("WaitForSingleObject() failed"); + } + GC_release_mark_lock(); +} + +void GC_notify_all_builder(void) +{ + GC_ASSERT(GC_mark_lock_holder == (unsigned long)GetCurrentThreadId()); + GC_ASSERT(builder_cv != 0); + GC_ASSERT(GC_fl_builder_count == 0); + if (SetEvent(builder_cv) == FALSE) + ABORT("SetEvent() failed"); +} + +/* For GC_wait_marker/GC_notify_all_marker() the above technique does */ +/* not work because they are used with different checked conditions in */ +/* different places (and, in addition, notifying is done after leaving */ +/* critical section) and this could result in a signal loosing between */ +/* checking for a particular condition and calling WaitForSingleObject. */ +/* So, we use PulseEvent() and NT SignalObjectAndWait() (which */ +/* atomically sets mutex event to signaled state and starts waiting on */ +/* condvar). A special case here is GC_mark_mutex_waitcnt == 1 (i.e. */ +/* nobody waits for mark lock at this moment) - we don't change it */ +/* (otherwise we may loose a signal sent between decrementing */ +/* GC_mark_mutex_waitcnt and calling WaitForSingleObject()). */ + +STATIC HANDLE mark_cv = (HANDLE)0; /* Event with manual reset */ + +typedef DWORD (WINAPI * SignalObjectAndWait_type)( + HANDLE, HANDLE, DWORD, BOOL); +STATIC SignalObjectAndWait_type signalObjectAndWait_func = 0; + +void GC_wait_marker(void) +{ + /* Here we assume that GC_wait_marker() is always called */ + /* from a while(check_cond) loop. */ + AO_t waitcnt; + GC_ASSERT(mark_cv != 0); + GC_ASSERT(signalObjectAndWait_func != 0); + + /* We inline GC_release_mark_lock() to have atomic */ + /* unlock-and-wait action here. */ + GC_ASSERT(GC_mark_lock_holder == (unsigned long)GetCurrentThreadId()); +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = NO_THREAD; +# endif - args = GC_malloc_uncollectable(sizeof(thread_args)); - /* Handed off to and deallocated by child thread. */ - if (0 == args) { - SetLastError(ERROR_NOT_ENOUGH_MEMORY); - return NULL; + if ((waitcnt = AO_load(&GC_mark_mutex_waitcnt)) > 1) { + (void)AO_fetch_and_sub1_release(&GC_mark_mutex_waitcnt); + } else { + GC_ASSERT(AO_load(&GC_mark_mutex_waitcnt) != 0); } - /* set up thread arguments */ - args -> start = lpStartAddress; - args -> param = lpParameter; + /* The state of mark_cv is non-signaled here. */ + if ((*signalObjectAndWait_func)(mark_mutex_event /* hObjectToSignal */, + mark_cv /* hObjectToWaitOn */, + INFINITE /* timeout */, + FALSE /* isAlertable */) == WAIT_FAILED) + ABORT("SignalObjectAndWait() failed"); + /* The state of mark_cv is non-signaled here again. */ - thread_h = CreateThread(lpThreadAttributes, - dwStackSize, thread_start, - args, dwCreationFlags, - lpThreadId); + if (waitcnt > 1) { + GC_acquire_mark_lock(); + } else { + GC_ASSERT(GC_mark_mutex_waitcnt != 0); + /* Acquire mark lock */ + if (WaitForSingleObject(mark_mutex_event, INFINITE) == WAIT_FAILED) + ABORT("WaitForSingleObject() failed"); + GC_ASSERT(GC_mark_lock_holder == NO_THREAD); +# ifdef GC_ASSERTIONS + GC_mark_lock_holder = (unsigned long)GetCurrentThreadId(); +# endif + } +} - return thread_h; +void GC_notify_all_marker(void) +{ + GC_ASSERT(mark_cv != 0); + if (PulseEvent(mark_cv) == FALSE) + ABORT("PulseEvent() failed"); } -static DWORD WINAPI thread_start(LPVOID arg) +/* Defined in os_dep.c */ +extern GC_bool GC_wnt; + +#endif /* ! GC_PTHREADS */ + +#endif /* PARALLEL_MARK */ + +#ifndef GC_PTHREADS + +/* We have no DllMain to take care of new threads. Thus we */ +/* must properly intercept thread creation. */ + +typedef struct { + LPTHREAD_START_ROUTINE start; + LPVOID param; +} thread_args; + +STATIC void * GC_CALLBACK GC_win32_start_inner(struct GC_stack_base *sb, + void *arg) { - DWORD ret = 0; + void * ret; thread_args *args = (thread_args *)arg; - GC_new_thread(); + GC_register_my_thread(sb); /* This waits for an in-progress GC. */ + +# if DEBUG_WIN32_THREADS + GC_printf("thread 0x%x starting...\n", (unsigned)GetCurrentThreadId()); +# endif /* Clear the thread entry even if we exit with an exception. */ /* This is probably pointless, since an uncaught exception is */ @@ -507,21 +1584,128 @@ static DWORD WINAPI thread_start(LPVOID arg) #ifndef __GNUC__ __try { #endif /* __GNUC__ */ - ret = args->start (args->param); + ret = (void *)(word)args->start (args->param); #ifndef __GNUC__ } __finally { #endif /* __GNUC__ */ + GC_unregister_my_thread(); GC_free(args); - GC_delete_thread(GetCurrentThreadId()); #ifndef __GNUC__ } #endif /* __GNUC__ */ +# if DEBUG_WIN32_THREADS + GC_printf("thread 0x%x returned from start routine.\n", + (unsigned)GetCurrentThreadId()); +# endif return ret; } -#endif /* !defined(MSWINCE) && !(defined(__MINGW32__) && !defined(_DLL)) */ -#endif /* !CYGWIN32 */ +DWORD WINAPI GC_win32_start(LPVOID arg) +{ + return (DWORD)(word)GC_call_with_stack_base(GC_win32_start_inner, arg); +} + +GC_API HANDLE WINAPI GC_CreateThread( + LPSECURITY_ATTRIBUTES lpThreadAttributes, + DWORD dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress, + LPVOID lpParameter, DWORD dwCreationFlags, LPDWORD lpThreadId ) +{ + HANDLE thread_h; + + thread_args *args; + + if (!parallel_initialized) GC_init_parallel(); + /* make sure GC is initialized (i.e. main thread is attached, + tls initialized) */ + +# if DEBUG_WIN32_THREADS + GC_printf("About to create a thread from 0x%x\n", + (unsigned)GetCurrentThreadId()); +# endif + if (GC_win32_dll_threads) { + return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress, + lpParameter, dwCreationFlags, lpThreadId); + } else { + args = GC_malloc_uncollectable(sizeof(thread_args)); + /* Handed off to and deallocated by child thread. */ + if (0 == args) { + SetLastError(ERROR_NOT_ENOUGH_MEMORY); + return NULL; + } + + /* set up thread arguments */ + args -> start = lpStartAddress; + args -> param = lpParameter; + + GC_need_to_lock = TRUE; + thread_h = CreateThread(lpThreadAttributes, + dwStackSize, GC_win32_start, + args, dwCreationFlags, + lpThreadId); + if( thread_h == 0 ) GC_free( args ); + return thread_h; + } +} + +GC_API void WINAPI GC_ExitThread(DWORD dwExitCode) +{ + GC_unregister_my_thread(); + ExitThread(dwExitCode); +} + +#ifndef MSWINCE + +GC_API GC_uintptr_t GC_CALL GC_beginthreadex( + void *security, unsigned stack_size, + unsigned ( __stdcall *start_address )( void * ), + void *arglist, unsigned initflag, unsigned *thrdaddr) +{ + GC_uintptr_t thread_h; + + thread_args *args; + + if (!parallel_initialized) GC_init_parallel(); + /* make sure GC is initialized (i.e. main thread is attached, + tls initialized) */ +# if DEBUG_WIN32_THREADS + GC_printf("About to create a thread from 0x%x\n", + (unsigned)GetCurrentThreadId()); +# endif + + if (GC_win32_dll_threads) { + return _beginthreadex(security, stack_size, start_address, + arglist, initflag, thrdaddr); + } else { + args = GC_malloc_uncollectable(sizeof(thread_args)); + /* Handed off to and deallocated by child thread. */ + if (0 == args) { + SetLastError(ERROR_NOT_ENOUGH_MEMORY); + return (GC_uintptr_t)(-1L); + } + + /* set up thread arguments */ + args -> start = (LPTHREAD_START_ROUTINE)start_address; + args -> param = arglist; + + GC_need_to_lock = TRUE; + thread_h = _beginthreadex(security, stack_size, + (unsigned (__stdcall *) (void *))GC_win32_start, + args, initflag, thrdaddr); + if( thread_h == 0 ) GC_free( args ); + return thread_h; + } +} + +GC_API void GC_CALL GC_endthreadex(unsigned retval) +{ + GC_unregister_my_thread(); + _endthreadex(retval); +} + +#endif /* !MSWINCE */ + +#endif /* !GC_PTHREADS */ #ifdef MSWINCE @@ -576,16 +1760,109 @@ DWORD WINAPI main_thread_start(LPVOID arg) # else /* !MSWINCE */ /* Called by GC_init() - we hold the allocation lock. */ -void GC_thr_init() { +void GC_thr_init(void) { + struct GC_stack_base sb; +# ifdef GC_ASSERTIONS + int sb_result; +# endif + + GC_ASSERT(I_HOLD_LOCK()); if (GC_thr_initialized) return; GC_main_thread = GetCurrentThreadId(); GC_thr_initialized = TRUE; /* Add the initial thread, so we can stop it. */ - GC_new_thread(); +# ifdef GC_ASSERTIONS + sb_result = +# endif + GC_get_stack_base(&sb); + GC_ASSERT(sb_result == GC_SUCCESS); + +# ifdef PARALLEL_MARK + /* Set GC_markers. */ + { + char * markers_string = GETENV("GC_MARKERS"); + if (markers_string != NULL) { + GC_markers = atoi(markers_string); + if (GC_markers > MAX_MARKERS) { + WARN("Limiting number of mark threads\n", 0); + GC_markers = MAX_MARKERS; + } + } else { +# ifdef _WIN64 + DWORD_PTR procMask = 0; + DWORD_PTR sysMask; +# else + DWORD procMask = 0; + DWORD sysMask; +# endif + int ncpu = 0; + if (GetProcessAffinityMask(GetCurrentProcess(), + (void *)&procMask, (void *)&sysMask) + && procMask) { + do { + ncpu++; + } while ((procMask &= procMask - 1) != 0); + } + GC_markers = ncpu; + if (GC_markers >= MAX_MARKERS) + GC_markers = MAX_MARKERS; /* silently limit GC_markers value */ + } + } + + /* Set GC_parallel. */ + { +# ifndef GC_PTHREADS + HMODULE hK32; + /* SignalObjectAndWait() API call works only under NT. */ +# endif + if (GC_markers <= 1 || GC_win32_dll_threads +# ifndef GC_PTHREADS + || GC_wnt == FALSE + || (hK32 = GetModuleHandleA("kernel32.dll")) == (HMODULE)0 + || (signalObjectAndWait_func = (SignalObjectAndWait_type) + GetProcAddress(hK32, "SignalObjectAndWait")) == 0 +# endif + ) { + /* Disable parallel marking. */ + GC_parallel = FALSE; + GC_markers = 1; + } else { +# ifndef GC_PTHREADS + /* Initialize Win32 event objects for parallel marking. */ + mark_mutex_event = CreateEventA(NULL /* attrs */, + FALSE /* isManualReset */, + FALSE /* initialState */, NULL /* name */); + builder_cv = CreateEventA(NULL /* attrs */, + TRUE /* isManualReset */, + FALSE /* initialState */, NULL /* name */); + mark_cv = CreateEventA(NULL /* attrs */, TRUE /* isManualReset */, + FALSE /* initialState */, NULL /* name */); + if (mark_mutex_event == (HANDLE)0 || builder_cv == (HANDLE)0 + || mark_cv == (HANDLE)0) + ABORT("CreateEvent() failed"); +# endif + GC_parallel = TRUE; + /* Disable true incremental collection, but generational is OK. */ + GC_time_limit = GC_TIME_UNLIMITED; + } + } + + if (GC_print_stats) { + GC_log_printf("Number of marker threads = %ld\n", GC_markers); + } +# endif /* PARALLEL_MARK */ + + GC_ASSERT(0 == GC_lookup_thread_inner(GC_main_thread)); + GC_register_my_thread_inner(&sb, GC_main_thread); + +# ifdef PARALLEL_MARK + /* If we are using a parallel marker, actually start helper threads. */ + if (GC_parallel) start_mark_threads(); +# endif } -#ifdef CYGWIN32 +#ifdef GC_PTHREADS struct start_info { void *(*start_routine)(void *); @@ -595,28 +1872,51 @@ struct start_info { int GC_pthread_join(pthread_t pthread_id, void **retval) { int result; - int i; - GC_thread me; + GC_thread joinee; # if DEBUG_CYGWIN_THREADS - GC_printf3("thread 0x%x(0x%x) is joining thread 0x%x.\n", - (int)pthread_self(), GetCurrentThreadId(), (int)pthread_id); + GC_printf("thread 0x%x(0x%x) is joining thread 0x%x.\n", + (int)pthread_self(), (int)GetCurrentThreadId(), + (int)pthread_id); +# endif +# if DEBUG_WIN32_PTHREADS + GC_printf("thread 0x%x(0x%x) is joining thread 0x%x.\n", + (int)(pthread_self()).p, (int)GetCurrentThreadId(), + pthread_id.p); # endif + if (!parallel_initialized) GC_init_parallel(); /* Thread being joined might not have registered itself yet. */ /* After the join,thread id may have been recycled. */ /* FIXME: It would be better if this worked more like */ /* pthread_support.c. */ - while ((me = GC_lookup_thread(pthread_id)) == 0) Sleep(10); +# ifndef GC_WIN32_PTHREADS + while ((joinee = GC_lookup_pthread(pthread_id)) == 0) Sleep(10); +# endif result = pthread_join(pthread_id, retval); - GC_delete_gc_thread(me); +# ifdef GC_WIN32_PTHREADS + /* win32_pthreads id are unique */ + joinee = GC_lookup_pthread(pthread_id); +# endif + + if (!GC_win32_dll_threads) { + LOCK(); + GC_delete_gc_thread(joinee); + UNLOCK(); + } /* otherwise dllmain handles it. */ # if DEBUG_CYGWIN_THREADS - GC_printf3("thread 0x%x(0x%x) completed join with thread 0x%x.\n", - (int)pthread_self(), GetCurrentThreadId(), (int)pthread_id); + GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n", + (int)pthread_self(), (int)GetCurrentThreadId(), + (int)pthread_id); +# endif +# if DEBUG_WIN32_PTHREADS + GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n", + (int)(pthread_self()).p, (int)GetCurrentThreadId(), + pthread_id.p); # endif return result; @@ -633,8 +1933,11 @@ GC_pthread_create(pthread_t *new_thread, int result; struct start_info * si; - if (!GC_is_initialized) GC_init(); + if (!parallel_initialized) GC_init_parallel(); /* make sure GC is initialized (i.e. main thread is attached) */ + if (GC_win32_dll_threads) { + return pthread_create(new_thread, attr, start_routine, arg); + } /* This is otherwise saved only in an area mmapped by the thread */ /* library, which isn't visible to the collector. */ @@ -650,10 +1953,15 @@ GC_pthread_create(pthread_t *new_thread, } # if DEBUG_CYGWIN_THREADS - GC_printf2("About to create a thread from 0x%x(0x%x)\n", - (int)pthread_self(), GetCurrentThreadId); + GC_printf("About to create a thread from 0x%x(0x%x)\n", + (int)pthread_self(), (int)GetCurrentThreadId); # endif - result = pthread_create(new_thread, attr, GC_start_routine, si); +# if DEBUG_WIN32_PTHREADS + GC_printf("About to create a thread from 0x%x(0x%x)\n", + (int)(pthread_self()).p, (int)GetCurrentThreadId()); +# endif + GC_need_to_lock = TRUE; + result = pthread_create(new_thread, attr, GC_pthread_start, si); if (result) { /* failure */ GC_free(si); @@ -662,22 +1970,27 @@ GC_pthread_create(pthread_t *new_thread, return(result); } -void * GC_start_routine(void * arg) +STATIC void * GC_CALLBACK GC_pthread_start_inner(struct GC_stack_base *sb, + void * arg) { struct start_info * si = arg; void * result; void *(*start)(void *); void *start_arg; - pthread_t pthread_id; + DWORD thread_id = GetCurrentThreadId(); + pthread_t pthread_id = pthread_self(); GC_thread me; - GC_bool detached; - int i; # if DEBUG_CYGWIN_THREADS - GC_printf2("thread 0x%x(0x%x) starting...\n",(int)pthread_self(), - GetCurrentThreadId()); + GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_id, + (int)thread_id); +# endif +# if DEBUG_WIN32_PTHREADS + GC_printf("thread 0x%x(0x%x) starting...\n",(int) pthread_id.p, + (int)thread_id); # endif + GC_ASSERT(!GC_win32_dll_threads); /* If a GC occurs before the thread is registered, that GC will */ /* ignore this thread. That's fine, since it will block trying to */ /* acquire the allocation lock, and won't yet hold interesting */ @@ -685,40 +1998,57 @@ void * GC_start_routine(void * arg) LOCK(); /* We register the thread here instead of in the parent, so that */ /* we don't need to hold the allocation lock during pthread_create. */ - me = GC_new_thread(); + me = GC_register_my_thread_inner(sb, thread_id); + SET_PTHREAD_MAP_CACHE(pthread_id, thread_id); UNLOCK(); start = si -> start_routine; start_arg = si -> arg; if (si-> detached) me -> flags |= DETACHED; - me -> pthread_id = pthread_id = pthread_self(); + me -> pthread_id = pthread_id; GC_free(si); /* was allocated uncollectable */ pthread_cleanup_push(GC_thread_exit_proc, (void *)me); result = (*start)(start_arg); me -> status = result; - pthread_cleanup_pop(0); + pthread_cleanup_pop(1); # if DEBUG_CYGWIN_THREADS - GC_printf2("thread 0x%x(0x%x) returned from start routine.\n", - (int)pthread_self(),GetCurrentThreadId()); + GC_printf("thread 0x%x(0x%x) returned from start routine.\n", + (int)pthread_self(),(int)GetCurrentThreadId()); +# endif +# if DEBUG_WIN32_PTHREADS + GC_printf("thread 0x%x(0x%x) returned from start routine.\n", + (int)(pthread_self()).p, (int)GetCurrentThreadId()); # endif return(result); } -void GC_thread_exit_proc(void *arg) +STATIC void * GC_pthread_start(void * arg) +{ + return GC_call_with_stack_base(GC_pthread_start_inner, arg); +} + +STATIC void GC_thread_exit_proc(void *arg) { GC_thread me = (GC_thread)arg; - int i; + GC_ASSERT(!GC_win32_dll_threads); # if DEBUG_CYGWIN_THREADS - GC_printf2("thread 0x%x(0x%x) called pthread_exit().\n", - (int)pthread_self(),GetCurrentThreadId()); + GC_printf("thread 0x%x(0x%x) called pthread_exit().\n", + (int)pthread_self(),(int)GetCurrentThreadId()); +# endif +# if DEBUG_WIN32_PTHREADS + GC_printf("thread 0x%x(0x%x) called pthread_exit().\n", + (int)(pthread_self()).p,(int)GetCurrentThreadId()); # endif LOCK(); +# if defined(THREAD_LOCAL_ALLOC) + GC_destroy_thread_local(&(me->tlfs)); +# endif if (me -> flags & DETACHED) { GC_delete_thread(GetCurrentThreadId()); } else { @@ -728,18 +2058,23 @@ void GC_thread_exit_proc(void *arg) UNLOCK(); } +#ifndef GC_WIN32_PTHREADS +/* win32 pthread does not support sigmask */ /* nothing required here... */ int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset) { + if (!parallel_initialized) GC_init_parallel(); return pthread_sigmask(how, set, oset); } +#endif int GC_pthread_detach(pthread_t thread) { int result; GC_thread thread_gc_id; + if (!parallel_initialized) GC_init_parallel(); LOCK(); - thread_gc_id = GC_lookup_thread(thread); + thread_gc_id = GC_lookup_pthread(thread); UNLOCK(); result = pthread_detach(thread); if (result == 0) { @@ -754,41 +2089,76 @@ int GC_pthread_detach(pthread_t thread) return result; } -#else /* !CYGWIN32 */ +#else /* !GC_PTHREADS */ /* - * We avoid acquiring locks here, since this doesn't seem to be preemptable. - * Pontus Rydin suggests wrapping the thread start routine instead. + * We avoid acquiring locks here, since this doesn't seem to be preemptible. + * This may run with an uninitialized collector, in which case we don't do much. + * This implies that no threads other than the main one should be created + * with an uninitialized collector. (The alternative of initializing + * the collector here seems dangerous, since DllMain is limited in what it + * can do.) */ #ifdef GC_DLL +/*ARGSUSED*/ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved) { + struct GC_stack_base sb; + DWORD thread_id; +# ifdef GC_ASSERTIONS + int sb_result; +# endif + static int entry_count = 0; + + if (parallel_initialized && !GC_win32_dll_threads) return TRUE; + switch (reason) { - case DLL_PROCESS_ATTACH: - GC_init(); /* Force initialization before thread attach. */ - /* fall through */ - case DLL_THREAD_ATTACH: - GC_ASSERT(GC_thr_initialized); - if (GC_main_thread != GetCurrentThreadId()) { - GC_new_thread(); + case DLL_THREAD_ATTACH: +# ifdef PARALLEL_MARK + /* Don't register marker threads. */ + if (GC_parallel) { + /* We could reach here only if parallel_initialized == FALSE. */ + break; + } +# endif + GC_ASSERT(entry_count == 0 || parallel_initialized); + ++entry_count; /* and fall through: */ + case DLL_PROCESS_ATTACH: + /* This may run with the collector uninitialized. */ + thread_id = GetCurrentThreadId(); + if (parallel_initialized && GC_main_thread != thread_id) { + /* Don't lock here. */ +# ifdef GC_ASSERTIONS + sb_result = +# endif + GC_get_stack_base(&sb); + GC_ASSERT(sb_result == GC_SUCCESS); +# if defined(THREAD_LOCAL_ALLOC) || defined(PARALLEL_MARK) + ABORT("Cannot initialize thread local cache from DllMain"); +# endif + GC_register_my_thread_inner(&sb, thread_id); } /* o.w. we already did it during GC_thr_init(), called by GC_init() */ break; - case DLL_THREAD_DETACH: + case DLL_THREAD_DETACH: + /* We are hopefully running in the context of the exiting thread. */ + GC_ASSERT(parallel_initialized); + if (!GC_win32_dll_threads) return TRUE; GC_delete_thread(GetCurrentThreadId()); break; - case DLL_PROCESS_DETACH: + case DLL_PROCESS_DETACH: { int i; + int my_max; - LOCK(); - for (i = 0; i <= GC_get_max_thread_index(); ++i) + if (!GC_win32_dll_threads) return TRUE; + my_max = (int)GC_get_max_thread_index(); + for (i = 0; i <= my_max; ++i) { - if (thread_table[i].in_use) - GC_delete_gc_thread(thread_table + i); + if (AO_load(&(dll_thread_table[i].in_use))) + GC_delete_gc_thread(dll_thread_table + i); } - UNLOCK(); GC_deinit(); DeleteCriticalSection(&GC_allocate_ml); @@ -799,8 +2169,98 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved) return TRUE; } #endif /* GC_DLL */ -#endif /* !CYGWIN32 */ +#endif /* !GC_PTHREADS */ # endif /* !MSWINCE */ +/* Perform all initializations, including those that */ +/* may require allocation. */ +/* Called without allocation lock. */ +/* Must be called before a second thread is created. */ +void GC_init_parallel(void) +{ + if (parallel_initialized) return; + parallel_initialized = TRUE; + /* GC_init() calls us back, so set flag first. */ + + if (!GC_is_initialized) GC_init(); + if (GC_win32_dll_threads) { + GC_need_to_lock = TRUE; + /* Cannot intercept thread creation. Hence we don't know if */ + /* other threads exist. However, client is not allowed to */ + /* create other threads before collector initialization. */ + /* Thus it's OK not to lock before this. */ + } + /* Initialize thread local free lists if used. */ +# if defined(THREAD_LOCAL_ALLOC) + LOCK(); + GC_init_thread_local(&GC_lookup_thread_inner(GetCurrentThreadId())->tlfs); + UNLOCK(); +# endif +} + +#if defined(USE_PTHREAD_LOCKS) + /* Support for pthread locking code. */ + /* Pthread_mutex_try_lock may not win here, */ + /* due to builtin support for spinning first? */ + +volatile GC_bool GC_collecting = 0; + /* A hint that we're in the collector and */ + /* holding the allocation lock for an */ + /* extended period. */ + +void GC_lock(void) +{ + pthread_mutex_lock(&GC_allocate_ml); +} +#endif /* USE_PTHREAD ... */ + +# if defined(THREAD_LOCAL_ALLOC) + +/* Add thread-local allocation support. Microsoft uses __declspec(thread) */ + +/* We must explicitly mark ptrfree and gcj free lists, since the free */ +/* list links wouldn't otherwise be found. We also set them in the */ +/* normal free lists, since that involves touching less memory than if */ +/* we scanned them normally. */ +void GC_mark_thread_local_free_lists(void) +{ + int i; + GC_thread p; + + for (i = 0; i < THREAD_TABLE_SZ; ++i) { + for (p = GC_threads[i]; 0 != p; p = p -> next) { +# ifdef DEBUG_THREADS + GC_printf("Marking thread locals for 0x%x\n", p -> id); +# endif + GC_mark_thread_local_fls_for(&(p->tlfs)); + } + } +} + +#if defined(GC_ASSERTIONS) + void GC_check_tls_for(GC_tlfs p); +# if defined(USE_CUSTOM_SPECIFIC) + void GC_check_tsd_marks(tsd *key); +# endif + /* Check that all thread-local free-lists are completely marked. */ + /* also check that thread-specific-data structures are marked. */ + void GC_check_tls(void) { + int i; + GC_thread p; + + for (i = 0; i < THREAD_TABLE_SZ; ++i) { + for (p = GC_threads[i]; 0 != p; p = p -> next) { + GC_check_tls_for(&(p->tlfs)); + } + } +# if defined(USE_CUSTOM_SPECIFIC) + if (GC_thread_key != 0) + GC_check_tsd_marks(GC_thread_key); +# endif + } +#endif /* GC_ASSERTIONS */ + +#endif /* THREAD_LOCAL_ALLOC ... */ + #endif /* GC_WIN32_THREADS */