This patchset implements a fast version of ThreadLocal<T> by reusing the code from
the implementation of the ThreadStatic attribute. The tls slots are cleaned up
on Dispose (or Finalize) instead of during appdomain shutdown.
Since C# doesn't allow us to define an icall that returns a T& or a T* type, a JIT
hack is used: every load field address instruction for the tlsdata field in
ThreadLocal<T> is intercepted and trasformed into a tls access by using the tls_offset
field from the same ThreadLocal<T> instance. So, while the C# code looks like
it accesses and changes an instance field, under the hood it will access the tls
storage.
The end result is that this is 5-6 times faster than the previous code, 10-15%
faster than using Thread.GetData () (when used with references).
This implementation also fixes a number of issues with disposing and with
per-thread initializers throwing an exception.
#endif
// Looks up the object associated with the current thread
+ // this is called by the JIT directly, too
[MethodImplAttribute(MethodImplOptions.InternalCall)]
private extern static InternalThread CurrentInternalThread_internal();
+ [MethodImplAttribute(MethodImplOptions.InternalCall)]
+ internal extern static uint AllocTlsData (Type type);
+
+ [MethodImplAttribute(MethodImplOptions.InternalCall)]
+ internal extern static void DestroyTlsData (uint offset);
+
public static Thread CurrentThread {
[ReliabilityContract (Consistency.WillNotCorruptState, Cer.MayFail)]
get {
//
// Author:
// Jérémie "Garuma" Laval <jeremie.laval@gmail.com>
+// Rewritten by Paolo Molaro (lupus@ximian.com)
//
// Copyright (c) 2009 Jérémie "Garuma" Laval
//
[System.Diagnostics.DebuggerTypeProxy ("System.Threading.SystemThreading_ThreadLocalDebugView`1")]
public class ThreadLocal<T> : IDisposable
{
- readonly Func<T> valueFactory;
- LocalDataStoreSlot localStore;
- Exception cachedException;
-
- class DataSlotWrapper
- {
- public bool Creating;
- public bool Init;
- public Func<T> Getter;
+ struct TlsDatum {
+ internal sbyte state; /* 0 uninitialized, < 0 initializing, > 0 inited */
+ internal Exception cachedException; /* this is per-thread */
+ internal T data;
}
+
+ Func<T> valueFactory;
+ /* The tlsdata field is handled magically by the JIT
+ * It must be a struct and it is always accessed by ldflda: the JIT, instead of
+ * computing the address inside the instance, will return the address of the variable
+ * for the current thread (based on tls_offset). This magic wouldn't be needed if C#
+ * let us declare an icall with a TlsDatum& return type...
+ * For this same reason, we must check tls_offset for != 0 to make sure it's valid before accessing tlsdata
+ * The address of the tls var is cached per method at the first IL ldflda instruction, so care must be taken
+ * not to cause it to be conditionally executed.
+ */
+ uint tls_offset;
+ TlsDatum tlsdata;
- public ThreadLocal () : this (LazyInitializer.GetDefaultValueFactory<T>)
+ public ThreadLocal ()
{
+ tls_offset = Thread.AllocTlsData (typeof (TlsDatum));
}
- public ThreadLocal (Func<T> valueFactory)
+ public ThreadLocal (Func<T> valueFactory) : this ()
{
if (valueFactory == null)
throw new ArgumentNullException ("valueFactory");
-
- localStore = Thread.AllocateDataSlot ();
this.valueFactory = valueFactory;
}
-
+
public void Dispose ()
{
Dispose (true);
protected virtual void Dispose (bool disposing)
{
-
+ if (tls_offset != 0) {
+ uint o = tls_offset;
+ tls_offset = 0;
+ if (disposing)
+ valueFactory = null;
+ Thread.DestroyTlsData (o);
+ GC.SuppressFinalize (this);
+ }
+ }
+
+ ~ThreadLocal ()
+ {
+ Dispose (false);
}
public bool IsValueCreated {
get {
- ThrowIfNeeded ();
- return IsInitializedThreadLocal ();
+ if (tls_offset == 0)
+ throw new ObjectDisposedException ("ThreadLocal object");
+ /* ALERT! magic tlsdata JIT access redirects to TLS value instead of instance field */
+ return tlsdata.state > 0;
}
}
+ T GetSlowPath () {
+ /* ALERT! magic tlsdata JIT access redirects to TLS value instead of instance field */
+ if (tlsdata.cachedException != null)
+ throw tlsdata.cachedException;
+ if (tlsdata.state < 0)
+ throw new InvalidOperationException ("The initialization function attempted to reference Value recursively");
+ tlsdata.state = -1;
+ if (valueFactory != null) {
+ try {
+ tlsdata.data = valueFactory ();
+ } catch (Exception ex) {
+ tlsdata.cachedException = ex;
+ throw ex;
+ }
+ } else {
+ tlsdata.data = default (T);
+ }
+ tlsdata.state = 1;
+ return tlsdata.data;
+ }
+
[System.Diagnostics.DebuggerBrowsableAttribute (System.Diagnostics.DebuggerBrowsableState.Never)]
public T Value {
get {
- ThrowIfNeeded ();
- return GetValueThreadLocal ();
+ if (tls_offset == 0)
+ throw new ObjectDisposedException ("ThreadLocal object");
+ /* ALERT! magic tlsdata JIT access redirects to TLS value instead of instance field */
+ if (tlsdata.state > 0)
+ return tlsdata.data;
+ return GetSlowPath ();
}
set {
- ThrowIfNeeded ();
-
- DataSlotWrapper w = GetWrapper ();
- w.Init = true;
- w.Getter = () => value;
+ if (tls_offset == 0)
+ throw new ObjectDisposedException ("ThreadLocal object");
+ /* ALERT! magic tlsdata JIT access redirects to TLS value instead of instance field */
+ tlsdata.state = 1;
+ tlsdata.data = value;
}
}
return string.Format ("[ThreadLocal: IsValueCreated={0}, Value={1}]", IsValueCreated, Value);
}
- T GetValueThreadLocal ()
- {
- DataSlotWrapper myWrapper = GetWrapper ();
- if (myWrapper.Creating)
- throw new InvalidOperationException ("The initialization function attempted to reference Value recursively");
-
- return myWrapper.Getter ();
- }
-
- bool IsInitializedThreadLocal ()
- {
- DataSlotWrapper myWrapper = GetWrapper ();
-
- return myWrapper.Init;
- }
-
- DataSlotWrapper GetWrapper ()
- {
- DataSlotWrapper myWrapper = (DataSlotWrapper)Thread.GetData (localStore);
- if (myWrapper == null) {
- myWrapper = DataSlotCreator ();
- Thread.SetData (localStore, myWrapper);
- }
-
- return myWrapper;
- }
-
- void ThrowIfNeeded ()
- {
- if (cachedException != null)
- throw cachedException;
- }
-
- DataSlotWrapper DataSlotCreator ()
- {
- DataSlotWrapper wrapper = new DataSlotWrapper ();
- Func<T> valSelector = valueFactory;
-
- wrapper.Getter = delegate {
- wrapper.Creating = true;
- try {
- T val = valSelector ();
- wrapper.Creating = false;
- wrapper.Init = true;
- wrapper.Getter = () => val;
- return val;
- } catch (Exception e) {
- cachedException = e;
- throw e;
- }
- };
-
- return wrapper;
- }
}
}
#endif
Assert.AreEqual (default (object), local2.Value);
}
+ [Test, ExpectedException (typeof (ObjectDisposedException))]
+ public void DisposedOnValueTest ()
+ {
+ var tl = new ThreadLocal<int> ();
+ tl.Dispose ();
+ var value = tl.Value;
+ }
+
+ [Test, ExpectedException (typeof (ObjectDisposedException))]
+ public void DisposedOnIsValueCreatedTest ()
+ {
+ var tl = new ThreadLocal<int> ();
+ tl.Dispose ();
+ var value = tl.IsValueCreated;
+ }
+
+ [Test]
+ public void PerThreadException ()
+ {
+ int callTime = 0;
+ threadLocal = new ThreadLocal<int> (() => {
+ if (callTime == 1)
+ throw new ApplicationException ("foo");
+ Interlocked.Increment (ref callTime);
+ return 43;
+ });
+
+ Exception exception = null;
+
+ var foo = threadLocal.Value;
+ bool thread_value_created = false;
+ Assert.AreEqual (43, foo, "#3");
+ Thread t = new Thread ((object o) => {
+ try {
+ var foo2 = threadLocal.Value;
+ } catch (Exception e) {
+ exception = e;
+ }
+ // should be false and not throw
+ thread_value_created = threadLocal.IsValueCreated;
+ });
+ t.Start ();
+ t.Join ();
+ Assert.AreEqual (false, thread_value_created, "#4");
+ Assert.IsNotNull (exception, "#5");
+ Assert.IsInstanceOfType (typeof (ApplicationException), exception, "#6");
+ }
+
void AssertThreadLocal ()
{
Assert.IsFalse (threadLocal.IsValueCreated, "#1");
typedef struct _MonoJitCodeHash MonoJitCodeHash;
+typedef struct _MonoTlsDataRecord MonoTlsDataRecord;
+struct _MonoTlsDataRecord {
+ MonoTlsDataRecord *next;
+ guint32 tls_offset;
+ guint32 size;
+};
+
struct _MonoDomain {
/*
* This lock must never be taken before the loader lock,
MonoMethod *private_invoke_method;
/* Used to store offsets of thread and context static fields */
GHashTable *special_static_fields;
+ MonoTlsDataRecord *tlsrec_list;
/*
* This must be a GHashTable, since these objects can't be finalized
* if the hashtable contains a GC visible reference to them.
mono_g_hash_table_destroy (domain->env);
domain->env = NULL;
+ if (domain->tlsrec_list) {
+ mono_thread_destroy_domain_tls (domain);
+ domain->tlsrec_list = NULL;
+ }
+
mono_reflection_cleanup_domain (domain);
if (domain->type_hash) {
ICALL_TYPE(THREAD, "System.Threading.Thread", THREAD_1)
ICALL(THREAD_1, "Abort_internal(System.Threading.InternalThread,object)", ves_icall_System_Threading_Thread_Abort)
+ICALL(THREAD_1aa, "AllocTlsData", mono_thread_alloc_tls)
ICALL(THREAD_1a, "ByteArrayToCurrentDomain(byte[])", ves_icall_System_Threading_Thread_ByteArrayToCurrentDomain)
ICALL(THREAD_1b, "ByteArrayToRootDomain(byte[])", ves_icall_System_Threading_Thread_ByteArrayToRootDomain)
ICALL(THREAD_2, "ClrState(System.Threading.InternalThread,System.Threading.ThreadState)", ves_icall_System_Threading_Thread_ClrState)
ICALL(THREAD_2a, "ConstructInternalThread", ves_icall_System_Threading_Thread_ConstructInternalThread)
ICALL(THREAD_3, "CurrentInternalThread_internal", mono_thread_internal_current)
+ICALL(THREAD_3a, "DestroyTlsData", mono_thread_destroy_tls)
ICALL(THREAD_4, "FreeLocalSlotValues", mono_thread_free_local_slot_values)
ICALL(THREAD_55, "GetAbortExceptionState", ves_icall_System_Threading_Thread_GetAbortExceptionState)
ICALL(THREAD_7, "GetDomainID", ves_icall_System_Threading_Thread_GetDomainID)
void
mono_class_compute_gc_descriptor (MonoClass *class) MONO_INTERNAL;
+gsize*
+mono_class_compute_bitmap (MonoClass *class, gsize *bitmap, int size, int offset, int *max_set, gboolean static_fields) MONO_INTERNAL;
+
MonoObject*
mono_object_xdomain_representation (MonoObject *obj, MonoDomain *target_domain, MonoObject **exc) MONO_INTERNAL;
return bitmap;
}
+/**
+ * mono_class_compute_bitmap:
+ *
+ * Mono internal function to compute a bitmap of reference fields in a class.
+ */
+gsize*
+mono_class_compute_bitmap (MonoClass *class, gsize *bitmap, int size, int offset, int *max_set, gboolean static_fields)
+{
+ return compute_class_bitmap (class, bitmap, size, offset, max_set, static_fields);
+}
+
#if 0
/*
* similar to the above, but sets the bits in the bitmap for any non-ref field
void mono_thread_internal_reset_abort (MonoInternalThread *thread) MONO_INTERNAL;
void mono_alloc_special_static_data_free (GHashTable *special_static_fields) MONO_INTERNAL;
+void mono_special_static_data_free_slot (guint32 offset, guint32 size) MONO_INTERNAL;
+uint32_t mono_thread_alloc_tls (MonoReflectionType *type) MONO_INTERNAL;
+void mono_thread_destroy_tls (uint32_t tls_offset) MONO_INTERNAL;
+void mono_thread_destroy_domain_tls (MonoDomain *domain) MONO_INTERNAL;
void mono_thread_free_local_slot_values (int slot, MonoBoolean thread_local) MONO_INTERNAL;
void mono_thread_current_check_pending_interrupt (void) MONO_INTERNAL;
void mono_thread_get_stack_bounds (guint8 **staddr, size_t *stsize) MONO_INTERNAL;
}
static void
-do_free_special (gpointer key, gpointer value, gpointer data)
+do_free_special_slot (guint32 offset, guint32 size)
{
- MonoClassField *field = key;
- guint32 offset = GPOINTER_TO_UINT (value);
guint32 static_type = (offset & 0x80000000);
- gint32 align;
- guint32 size;
- size = mono_type_size (field->type, &align);
/*g_print ("free %s , size: %d, offset: %x\n", field->name, size, offset);*/
if (static_type == 0) {
TlsOffsetSize data;
}
}
+static void
+do_free_special (gpointer key, gpointer value, gpointer data)
+{
+ MonoClassField *field = key;
+ guint32 offset = GPOINTER_TO_UINT (value);
+ gint32 align;
+ guint32 size;
+ size = mono_type_size (field->type, &align);
+ do_free_special_slot (offset, size);
+}
+
void
mono_alloc_special_static_data_free (GHashTable *special_static_fields)
{
mono_threads_unlock ();
}
+void
+mono_special_static_data_free_slot (guint32 offset, guint32 size)
+{
+ mono_threads_lock ();
+ do_free_special_slot (offset, size);
+ mono_threads_unlock ();
+}
+
+/*
+ * allocates room in the thread local area for storing an instance of the struct type
+ * the allocation is kept track of in domain->tlsrec_list.
+ */
+uint32_t
+mono_thread_alloc_tls (MonoReflectionType *type)
+{
+ MonoDomain *domain = mono_domain_get ();
+ MonoClass *klass;
+ MonoTlsDataRecord *tlsrec;
+ int max_set = 0;
+ gsize *bitmap;
+ gsize default_bitmap [4] = {0};
+ uint32_t tls_offset;
+ guint32 size;
+ gint32 align;
+
+ klass = mono_class_from_mono_type (type->type);
+ /* TlsDatum is a struct, so we subtract the object header size offset */
+ bitmap = mono_class_compute_bitmap (klass, default_bitmap, sizeof (default_bitmap) * 8, - (int)(sizeof (MonoObject) / sizeof (gpointer)), &max_set, FALSE);
+ size = mono_type_size (type->type, &align);
+ tls_offset = mono_alloc_special_static_data (SPECIAL_STATIC_THREAD, size, align, bitmap, max_set);
+ if (bitmap != default_bitmap)
+ g_free (bitmap);
+ tlsrec = g_new0 (MonoTlsDataRecord, 1);
+ tlsrec->tls_offset = tls_offset;
+ tlsrec->size = size;
+ mono_domain_lock (domain);
+ tlsrec->next = domain->tlsrec_list;
+ domain->tlsrec_list = tlsrec;
+ mono_domain_unlock (domain);
+ return tls_offset;
+}
+
+void
+mono_thread_destroy_tls (uint32_t tls_offset)
+{
+ MonoTlsDataRecord *prev = NULL;
+ MonoTlsDataRecord *cur;
+ guint32 size = 0;
+ MonoDomain *domain = mono_domain_get ();
+ mono_domain_lock (domain);
+ cur = domain->tlsrec_list;
+ while (cur) {
+ if (cur->tls_offset == tls_offset) {
+ if (prev)
+ prev->next = cur->next;
+ else
+ domain->tlsrec_list = cur->next;
+ size = cur->size;
+ g_free (cur);
+ break;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ mono_domain_unlock (domain);
+ if (size)
+ mono_special_static_data_free_slot (tls_offset, size);
+}
+
+/*
+ * This is just to ensure cleanup: the finalizers should have taken care, so this is not perf-critical.
+ */
+void
+mono_thread_destroy_domain_tls (MonoDomain *domain)
+{
+ while (domain->tlsrec_list)
+ mono_thread_destroy_tls (domain->tlsrec_list->tls_offset);
+}
+
static MonoClassField *local_slots = NULL;
typedef struct {
return supported_tail_call;
}
+/* the JIT intercepts ldflda instructions to the tlsdata field in ThreadLocal<T> and redirects
+ * it to the thread local value based on the tls_offset field. Every other kind of access to
+ * the field causes an assert.
+ */
+static gboolean
+is_magic_tls_access (MonoClassField *field)
+{
+ if (strcmp (field->name, "tlsdata"))
+ return FALSE;
+ if (strcmp (field->parent->name, "ThreadLocal`1"))
+ return FALSE;
+ return field->parent->image == mono_defaults.corlib;
+}
+
+/* emits the code needed to access a managed tls var (like ThreadStatic)
+ * with the value of the tls offset in offset_reg. thread_ins represents the MonoInternalThread
+ * pointer for the current thread.
+ * Returns the MonoInst* representing the address of the tls var.
+ */
+static MonoInst*
+emit_managed_static_data_access (MonoCompile *cfg, MonoInst *thread_ins, int offset_reg)
+{
+ MonoInst *addr;
+ int static_data_reg, array_reg, dreg;
+ int offset2_reg, idx_reg;
+ // inlined access to the tls data
+ // idx = (offset >> 24) - 1;
+ // return ((char*) thread->static_data [idx]) + (offset & 0xffffff);
+ static_data_reg = alloc_ireg (cfg);
+ MONO_EMIT_NEW_LOAD_MEMBASE (cfg, static_data_reg, thread_ins->dreg, G_STRUCT_OFFSET (MonoInternalThread, static_data));
+ idx_reg = alloc_ireg (cfg);
+ MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, idx_reg, offset_reg, 24);
+ MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISUB_IMM, idx_reg, idx_reg, 1);
+ MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, idx_reg, idx_reg, sizeof (gpointer) == 8 ? 3 : 2);
+ MONO_EMIT_NEW_BIALU (cfg, OP_PADD, static_data_reg, static_data_reg, idx_reg);
+ array_reg = alloc_ireg (cfg);
+ MONO_EMIT_NEW_LOAD_MEMBASE (cfg, array_reg, static_data_reg, 0);
+ offset2_reg = alloc_ireg (cfg);
+ MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IAND_IMM, offset2_reg, offset_reg, 0xffffff);
+ dreg = alloc_ireg (cfg);
+ EMIT_NEW_BIALU (cfg, addr, OP_PADD, dreg, array_reg, offset2_reg);
+ return addr;
+}
+
+/*
+ * redirect access to the tlsdata field to the tls var given by the tls_offset field.
+ * this address is cached per-method in cached_tls_addr.
+ */
+static MonoInst*
+create_magic_tls_access (MonoCompile *cfg, MonoClassField *tls_field, MonoInst **cached_tls_addr, MonoInst *thread_local)
+{
+ MonoInst *load, *addr, *temp, *store, *thread_ins;
+ MonoClassField *offset_field;
+
+ if (*cached_tls_addr) {
+ EMIT_NEW_TEMPLOAD (cfg, addr, (*cached_tls_addr)->inst_c0);
+ return addr;
+ }
+ thread_ins = mono_get_thread_intrinsic (cfg);
+ offset_field = mono_class_get_field_from_name (tls_field->parent, "tls_offset");
+
+ EMIT_NEW_LOAD_MEMBASE_TYPE (cfg, load, offset_field->type, thread_local->dreg, offset_field->offset);
+ if (thread_ins) {
+ MONO_ADD_INS (cfg->cbb, thread_ins);
+ } else {
+ MonoMethod *thread_method;
+ thread_method = mono_class_get_method_from_name (mono_get_thread_class(), "CurrentInternalThread_internal", 0);
+ thread_ins = mono_emit_method_call (cfg, thread_method, NULL, NULL);
+ }
+ addr = emit_managed_static_data_access (cfg, thread_ins, load->dreg);
+ addr->klass = mono_class_from_mono_type (tls_field->type);
+ addr->type = STACK_MP;
+ *cached_tls_addr = temp = mono_compile_create_var (cfg, type_from_stack_type (addr), OP_LOCAL);
+ EMIT_NEW_TEMPSTORE (cfg, store, temp->inst_c0, addr);
+
+ EMIT_NEW_TEMPLOAD (cfg, addr, temp->inst_c0);
+ return addr;
+}
+
/*
* mono_method_to_ir:
*
int context_used;
gboolean init_locals, seq_points, skip_dead_blocks;
gboolean disable_inline;
+ MonoInst *cached_tls_addr = NULL;
disable_inline = is_jit_optimizer_disabled (method);
FIELD_ACCESS_FAILURE;
mono_class_init (klass);
+ if (*ip != CEE_LDFLDA && is_magic_tls_access (field))
+ UNVERIFIED;
/* XXX this is technically required but, so far (SL2), no [SecurityCritical] types (not many exists) have
any visible *instance* field (in fact there's a single case for a static field in Marshal) XXX
if (mono_security_get_mode () == MONO_SECURITY_MODE_CORE_CLR)
}
if (*ip == CEE_LDFLDA) {
- if (sp [0]->type == STACK_OBJ) {
- MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sp [0]->dreg, 0);
- MONO_EMIT_NEW_COND_EXC (cfg, EQ, "NullReferenceException");
- }
+ if (is_magic_tls_access (field)) {
+ ins = sp [0];
+ *sp++ = create_magic_tls_access (cfg, field, &cached_tls_addr, ins);
+ } else {
+ if (sp [0]->type == STACK_OBJ) {
+ MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sp [0]->dreg, 0);
+ MONO_EMIT_NEW_COND_EXC (cfg, EQ, "NullReferenceException");
+ }
- dreg = alloc_ireg_mp (cfg);
+ dreg = alloc_ireg_mp (cfg);
- EMIT_NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, dreg, sp [0]->dreg, foffset);
- ins->klass = mono_class_from_mono_type (field->type);
- ins->type = STACK_MP;
- *sp++ = ins;
+ EMIT_NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, dreg, sp [0]->dreg, foffset);
+ ins->klass = mono_class_from_mono_type (field->type);
+ ins->type = STACK_MP;
+ *sp++ = ins;
+ }
} else {
MonoInst *load;