Implemented fast version of ThreadLocal<T>.
authorPaolo Molaro <lupus@ximian.com>
Tue, 1 Mar 2011 15:31:02 +0000 (16:31 +0100)
committerPaolo Molaro <lupus@ximian.com>
Tue, 1 Mar 2011 15:43:48 +0000 (16:43 +0100)
This patchset implements a fast version of ThreadLocal<T> by reusing the code from
the implementation of the ThreadStatic attribute. The tls slots are cleaned up
on Dispose (or Finalize) instead of during appdomain shutdown.
Since C# doesn't allow us to define an icall that returns a T& or a T* type, a JIT
hack is used: every load field address instruction for the tlsdata field in
ThreadLocal<T> is intercepted and trasformed into a tls access by using the tls_offset
field from the same ThreadLocal<T> instance. So, while the C# code looks like
it accesses and changes an instance field, under the hood it will access the tls
storage.
The end result is that this is 5-6 times faster than the previous code, 10-15%
faster than using Thread.GetData () (when used with references).
This implementation also fixes a number of issues with disposing and with
per-thread initializers throwing an exception.

mcs/class/corlib/System.Threading/Thread.cs
mcs/class/corlib/System.Threading/ThreadLocal.cs
mcs/class/corlib/Test/System.Threading/ThreadLocalTests.cs
mono/metadata/domain-internals.h
mono/metadata/domain.c
mono/metadata/icall-def.h
mono/metadata/object-internals.h
mono/metadata/object.c
mono/metadata/threads-types.h
mono/metadata/threads.c
mono/mini/method-to-ir.c

index 8da212496c33b99e4d888e83de42815ebb88d0d9..c39ab460e5f07e800a9563368a7fdf26435a1b39 100644 (file)
@@ -303,9 +303,16 @@ namespace System.Threading {
 #endif
 
                // Looks up the object associated with the current thread
+               // this is called by the JIT directly, too
                [MethodImplAttribute(MethodImplOptions.InternalCall)]
                private extern static InternalThread CurrentInternalThread_internal();
 
+               [MethodImplAttribute(MethodImplOptions.InternalCall)]
+               internal extern static uint AllocTlsData (Type type);
+
+               [MethodImplAttribute(MethodImplOptions.InternalCall)]
+               internal extern static void DestroyTlsData (uint offset);
+
                public static Thread CurrentThread {
                        [ReliabilityContract (Consistency.WillNotCorruptState, Cer.MayFail)]
                        get {
index 0ee5247e2241613994fd293bf588dabfd9780af0..63187fc9ec607be30b8a6575e489d4d8259d33fe 100644 (file)
@@ -3,6 +3,7 @@
 //  
 // Author:
 //       Jérémie "Garuma" Laval <jeremie.laval@gmail.com>
+//       Rewritten by Paolo Molaro (lupus@ximian.com)
 // 
 // Copyright (c) 2009 Jérémie "Garuma" Laval
 // 
@@ -37,30 +38,37 @@ namespace System.Threading
        [System.Diagnostics.DebuggerTypeProxy ("System.Threading.SystemThreading_ThreadLocalDebugView`1")]
        public class ThreadLocal<T> : IDisposable
        {
-               readonly Func<T> valueFactory;
-               LocalDataStoreSlot localStore;
-               Exception cachedException;
-               
-               class DataSlotWrapper
-               {
-                       public bool Creating;
-                       public bool Init;
-                       public Func<T> Getter;
+               struct TlsDatum {
+                       internal sbyte state; /* 0 uninitialized, < 0 initializing, > 0 inited */
+                       internal Exception cachedException; /* this is per-thread */
+                       internal T data;
                }
+
+               Func<T> valueFactory;
+               /* The tlsdata field is handled magically by the JIT
+                * It must be a struct and it is always accessed by ldflda: the JIT, instead of
+                * computing the address inside the instance, will return the address of the variable
+                * for the current thread (based on tls_offset). This magic wouldn't be needed if C#
+                * let us declare an icall with a TlsDatum& return type...
+                * For this same reason, we must check tls_offset for != 0 to make sure it's valid before accessing tlsdata
+                * The address of the tls var is cached per method at the first IL ldflda instruction, so care must be taken
+                * not to cause it to be conditionally executed.
+                */
+               uint tls_offset;
+               TlsDatum tlsdata;
                
-               public ThreadLocal () : this (LazyInitializer.GetDefaultValueFactory<T>)
+               public ThreadLocal ()
                {
+                       tls_offset = Thread.AllocTlsData (typeof (TlsDatum));
                }
 
-               public ThreadLocal (Func<T> valueFactory)
+               public ThreadLocal (Func<T> valueFactory) : this ()
                {
                        if (valueFactory == null)
                                throw new ArgumentNullException ("valueFactory");
-                       
-                       localStore = Thread.AllocateDataSlot ();
                        this.valueFactory = valueFactory;
                }
-               
+
                public void Dispose ()
                {
                        Dispose (true);
@@ -68,28 +76,67 @@ namespace System.Threading
                
                protected virtual void Dispose (bool disposing)
                {
-                       
+                       if (tls_offset != 0) {
+                               uint o = tls_offset;
+                               tls_offset = 0;
+                               if (disposing)
+                                       valueFactory = null;
+                               Thread.DestroyTlsData (o);
+                               GC.SuppressFinalize (this);
+                       }
+               }
+
+               ~ThreadLocal ()
+               {
+                       Dispose (false);
                }
                
                public bool IsValueCreated {
                        get {
-                               ThrowIfNeeded ();
-                               return IsInitializedThreadLocal ();
+                               if (tls_offset == 0)
+                                       throw new ObjectDisposedException ("ThreadLocal object");
+                               /* ALERT! magic tlsdata JIT access redirects to TLS value instead of instance field */
+                               return tlsdata.state > 0;
                        }
                }
 
+               T GetSlowPath () {
+                       /* ALERT! magic tlsdata JIT access redirects to TLS value instead of instance field */
+                       if (tlsdata.cachedException != null)
+                               throw tlsdata.cachedException;
+                       if (tlsdata.state < 0)
+                               throw new InvalidOperationException ("The initialization function attempted to reference Value recursively");
+                       tlsdata.state = -1;
+                       if (valueFactory != null) {
+                               try {
+                                       tlsdata.data = valueFactory ();
+                               } catch (Exception ex) {
+                                       tlsdata.cachedException = ex;
+                                       throw ex;
+                               }
+                       } else {
+                               tlsdata.data = default (T);
+                       }
+                       tlsdata.state = 1;
+                       return tlsdata.data;
+               }
+
                [System.Diagnostics.DebuggerBrowsableAttribute (System.Diagnostics.DebuggerBrowsableState.Never)]
                public T Value {
                        get {
-                               ThrowIfNeeded ();
-                               return GetValueThreadLocal ();
+                               if (tls_offset == 0)
+                                       throw new ObjectDisposedException ("ThreadLocal object");
+                               /* ALERT! magic tlsdata JIT access redirects to TLS value instead of instance field */
+                               if (tlsdata.state > 0)
+                                       return tlsdata.data;
+                               return GetSlowPath ();
                        }
                        set {
-                               ThrowIfNeeded ();
-
-                               DataSlotWrapper w = GetWrapper ();
-                               w.Init = true;
-                               w.Getter = () => value;
+                               if (tls_offset == 0)
+                                       throw new ObjectDisposedException ("ThreadLocal object");
+                               /* ALERT! magic tlsdata JIT access redirects to TLS value instead of instance field */
+                               tlsdata.state = 1;
+                               tlsdata.data = value;
                        }
                }
                
@@ -98,60 +145,6 @@ namespace System.Threading
                        return string.Format ("[ThreadLocal: IsValueCreated={0}, Value={1}]", IsValueCreated, Value);
                }
                
-               T GetValueThreadLocal ()
-               {
-                       DataSlotWrapper myWrapper = GetWrapper ();
-                       if (myWrapper.Creating)
-                               throw new InvalidOperationException ("The initialization function attempted to reference Value recursively");
-
-                       return myWrapper.Getter ();
-               }
-               
-               bool IsInitializedThreadLocal ()
-               {
-                       DataSlotWrapper myWrapper = GetWrapper ();
-
-                       return myWrapper.Init;
-               }
-
-               DataSlotWrapper GetWrapper ()
-               {
-                       DataSlotWrapper myWrapper = (DataSlotWrapper)Thread.GetData (localStore);
-                       if (myWrapper == null) {
-                               myWrapper = DataSlotCreator ();
-                               Thread.SetData (localStore, myWrapper);
-                       }
-
-                       return myWrapper;
-               }
-
-               void ThrowIfNeeded ()
-               {
-                       if (cachedException != null)
-                               throw cachedException;
-               }
-
-               DataSlotWrapper DataSlotCreator ()
-               {
-                       DataSlotWrapper wrapper = new DataSlotWrapper ();
-                       Func<T> valSelector = valueFactory;
-       
-                       wrapper.Getter = delegate {
-                               wrapper.Creating = true;
-                               try {
-                                       T val = valSelector ();
-                                       wrapper.Creating = false;
-                                       wrapper.Init = true;
-                                       wrapper.Getter = () => val;
-                                       return val;
-                               } catch (Exception e) {
-                                       cachedException = e;
-                                       throw e;
-                               }
-                       };
-                       
-                       return wrapper;
-               }
        }
 }
 #endif
index be385cd1b81f32675d9a2d684b9973cd45b53dfa..c96f1d8354b235117e81d34bfad8885f3fd88fa0 100644 (file)
@@ -116,6 +116,54 @@ namespace MonoTests.System.Threading
                        Assert.AreEqual (default (object), local2.Value);
                }
 
+               [Test, ExpectedException (typeof (ObjectDisposedException))]
+               public void DisposedOnValueTest ()
+               {
+                       var tl = new ThreadLocal<int> ();
+                       tl.Dispose ();
+                       var value = tl.Value;
+               }
+
+               [Test, ExpectedException (typeof (ObjectDisposedException))]
+               public void DisposedOnIsValueCreatedTest ()
+               {
+                       var tl = new ThreadLocal<int> ();
+                       tl.Dispose ();
+                       var value = tl.IsValueCreated;
+               }
+
+               [Test]
+               public void PerThreadException ()
+               {
+                       int callTime = 0;
+                       threadLocal = new ThreadLocal<int> (() => {
+                                       if (callTime == 1)
+                                               throw new ApplicationException ("foo");
+                                       Interlocked.Increment (ref callTime);
+                                       return 43;
+                               });
+
+                       Exception exception = null;
+
+                       var foo = threadLocal.Value;
+                       bool thread_value_created = false;
+                       Assert.AreEqual (43, foo, "#3");
+                       Thread t = new Thread ((object o) => {
+                               try {
+                                       var foo2 = threadLocal.Value;
+                               } catch (Exception e) {
+                                       exception = e;
+                               }
+                               // should be false and not throw
+                               thread_value_created = threadLocal.IsValueCreated;
+                       });
+                       t.Start ();
+                       t.Join ();
+                       Assert.AreEqual (false, thread_value_created, "#4");
+                       Assert.IsNotNull (exception, "#5");
+                       Assert.IsInstanceOfType (typeof (ApplicationException), exception, "#6");
+               }
+
                void AssertThreadLocal ()
                {
                        Assert.IsFalse (threadLocal.IsValueCreated, "#1");
index 815f605c4457d321b4fcda6e46a51c352c300029..f2de04df0be5047f853a6d5f9f7badce96afd351 100644 (file)
@@ -207,6 +207,13 @@ typedef struct _MonoThunkFreeList {
 
 typedef struct _MonoJitCodeHash MonoJitCodeHash;
 
+typedef struct _MonoTlsDataRecord MonoTlsDataRecord;
+struct _MonoTlsDataRecord {
+       MonoTlsDataRecord *next;
+       guint32 tls_offset;
+       guint32 size;
+};
+
 struct _MonoDomain {
        /*
         * This lock must never be taken before the loader lock,
@@ -277,6 +284,7 @@ struct _MonoDomain {
        MonoMethod         *private_invoke_method;
        /* Used to store offsets of thread and context static fields */
        GHashTable         *special_static_fields;
+       MonoTlsDataRecord  *tlsrec_list;
        /* 
         * This must be a GHashTable, since these objects can't be finalized
         * if the hashtable contains a GC visible reference to them.
index ab57fee5322e70fb164fc8106865db4d4ef95ff2..8e001df10eb8cf58678f5dd8d14d6f1ec05ced1b 100644 (file)
@@ -1955,6 +1955,11 @@ mono_domain_free (MonoDomain *domain, gboolean force)
        mono_g_hash_table_destroy (domain->env);
        domain->env = NULL;
 
+       if (domain->tlsrec_list) {
+               mono_thread_destroy_domain_tls (domain);
+               domain->tlsrec_list = NULL;
+       }
+
        mono_reflection_cleanup_domain (domain);
 
        if (domain->type_hash) {
index 9b894cbc2025ac0a05e4bdbc2138d06c52100c06..2db72e8f8ca7a4b9b796c41cbbbf53c33f0804b8 100644 (file)
@@ -860,11 +860,13 @@ ICALL(SEMA_3, "ReleaseSemaphore_internal(intptr,int,bool&)", ves_icall_System_Th
 
 ICALL_TYPE(THREAD, "System.Threading.Thread", THREAD_1)
 ICALL(THREAD_1, "Abort_internal(System.Threading.InternalThread,object)", ves_icall_System_Threading_Thread_Abort)
+ICALL(THREAD_1aa, "AllocTlsData", mono_thread_alloc_tls)
 ICALL(THREAD_1a, "ByteArrayToCurrentDomain(byte[])", ves_icall_System_Threading_Thread_ByteArrayToCurrentDomain)
 ICALL(THREAD_1b, "ByteArrayToRootDomain(byte[])", ves_icall_System_Threading_Thread_ByteArrayToRootDomain)
 ICALL(THREAD_2, "ClrState(System.Threading.InternalThread,System.Threading.ThreadState)", ves_icall_System_Threading_Thread_ClrState)
 ICALL(THREAD_2a, "ConstructInternalThread", ves_icall_System_Threading_Thread_ConstructInternalThread)
 ICALL(THREAD_3, "CurrentInternalThread_internal", mono_thread_internal_current)
+ICALL(THREAD_3a, "DestroyTlsData", mono_thread_destroy_tls)
 ICALL(THREAD_4, "FreeLocalSlotValues", mono_thread_free_local_slot_values)
 ICALL(THREAD_55, "GetAbortExceptionState", ves_icall_System_Threading_Thread_GetAbortExceptionState)
 ICALL(THREAD_7, "GetDomainID", ves_icall_System_Threading_Thread_GetDomainID)
index ffb7ba936b7e027c98de94e5f1dfaf53887806b3..46ebab02775d7889b5c63895ceebcd918c94a2b1 100644 (file)
@@ -1516,6 +1516,9 @@ mono_method_clear_object (MonoDomain *domain, MonoMethod *method) MONO_INTERNAL;
 void
 mono_class_compute_gc_descriptor (MonoClass *class) MONO_INTERNAL;
 
+gsize*
+mono_class_compute_bitmap (MonoClass *class, gsize *bitmap, int size, int offset, int *max_set, gboolean static_fields) MONO_INTERNAL;
+
 MonoObject*
 mono_object_xdomain_representation (MonoObject *obj, MonoDomain *target_domain, MonoObject **exc) MONO_INTERNAL;
 
index a865653f36b9138ef115eae1b44d4ff4a782230d..40999a618ba4da0d3a742a673cd2930fab765386 100644 (file)
@@ -756,6 +756,17 @@ compute_class_bitmap (MonoClass *class, gsize *bitmap, int size, int offset, int
        return bitmap;
 }
 
+/**
+ * mono_class_compute_bitmap:
+ *
+ * Mono internal function to compute a bitmap of reference fields in a class.
+ */
+gsize*
+mono_class_compute_bitmap (MonoClass *class, gsize *bitmap, int size, int offset, int *max_set, gboolean static_fields)
+{
+       return compute_class_bitmap (class, bitmap, size, offset, max_set, static_fields);
+}
+
 #if 0
 /* 
  * similar to the above, but sets the bits in the bitmap for any non-ref field
index 7ca33bc7a662c6f9e9c189268a370125fe31d4ca..13c4ab4ff3304264826c741ab50142e70262c2a9 100644 (file)
@@ -161,6 +161,10 @@ gboolean mono_thread_internal_has_appdomain_ref (MonoInternalThread *thread, Mon
 void mono_thread_internal_reset_abort (MonoInternalThread *thread) MONO_INTERNAL;
 
 void mono_alloc_special_static_data_free (GHashTable *special_static_fields) MONO_INTERNAL;
+void mono_special_static_data_free_slot (guint32 offset, guint32 size) MONO_INTERNAL;
+uint32_t mono_thread_alloc_tls   (MonoReflectionType *type) MONO_INTERNAL;
+void     mono_thread_destroy_tls (uint32_t tls_offset) MONO_INTERNAL;
+void     mono_thread_destroy_domain_tls (MonoDomain *domain) MONO_INTERNAL;
 void mono_thread_free_local_slot_values (int slot, MonoBoolean thread_local) MONO_INTERNAL;
 void mono_thread_current_check_pending_interrupt (void) MONO_INTERNAL;
 void mono_thread_get_stack_bounds (guint8 **staddr, size_t *stsize) MONO_INTERNAL;
index cc01d8f23e9bba9e413e445d5caba642b9b424e9..c090393736cba337f1dda46f192960589b948786 100644 (file)
@@ -3780,14 +3780,9 @@ free_thread_static_data_helper (gpointer key, gpointer value, gpointer user)
 }
 
 static void
-do_free_special (gpointer key, gpointer value, gpointer data)
+do_free_special_slot (guint32 offset, guint32 size)
 {
-       MonoClassField *field = key;
-       guint32 offset = GPOINTER_TO_UINT (value);
        guint32 static_type = (offset & 0x80000000);
-       gint32 align;
-       guint32 size;
-       size = mono_type_size (field->type, &align);
        /*g_print ("free %s , size: %d, offset: %x\n", field->name, size, offset);*/
        if (static_type == 0) {
                TlsOffsetSize data;
@@ -3812,6 +3807,17 @@ do_free_special (gpointer key, gpointer value, gpointer data)
        }
 }
 
+static void
+do_free_special (gpointer key, gpointer value, gpointer data)
+{
+       MonoClassField *field = key;
+       guint32 offset = GPOINTER_TO_UINT (value);
+       gint32 align;
+       guint32 size;
+       size = mono_type_size (field->type, &align);
+       do_free_special_slot (offset, size);
+}
+
 void
 mono_alloc_special_static_data_free (GHashTable *special_static_fields)
 {
@@ -3820,6 +3826,85 @@ mono_alloc_special_static_data_free (GHashTable *special_static_fields)
        mono_threads_unlock ();
 }
 
+void
+mono_special_static_data_free_slot (guint32 offset, guint32 size)
+{
+       mono_threads_lock ();
+       do_free_special_slot (offset, size);
+       mono_threads_unlock ();
+}
+
+/*
+ * allocates room in the thread local area for storing an instance of the struct type
+ * the allocation is kept track of in domain->tlsrec_list.
+ */
+uint32_t
+mono_thread_alloc_tls (MonoReflectionType *type)
+{
+       MonoDomain *domain = mono_domain_get ();
+       MonoClass *klass;
+       MonoTlsDataRecord *tlsrec;
+       int max_set = 0;
+       gsize *bitmap;
+       gsize default_bitmap [4] = {0};
+       uint32_t tls_offset;
+       guint32 size;
+       gint32 align;
+
+       klass = mono_class_from_mono_type (type->type);
+       /* TlsDatum is a struct, so we subtract the object header size offset */
+       bitmap = mono_class_compute_bitmap (klass, default_bitmap, sizeof (default_bitmap) * 8, - (int)(sizeof (MonoObject) / sizeof (gpointer)), &max_set, FALSE);
+       size = mono_type_size (type->type, &align);
+       tls_offset = mono_alloc_special_static_data (SPECIAL_STATIC_THREAD, size, align, bitmap, max_set);
+       if (bitmap != default_bitmap)
+               g_free (bitmap);
+       tlsrec = g_new0 (MonoTlsDataRecord, 1);
+       tlsrec->tls_offset = tls_offset;
+       tlsrec->size = size;
+       mono_domain_lock (domain);
+       tlsrec->next = domain->tlsrec_list;
+       domain->tlsrec_list = tlsrec;
+       mono_domain_unlock (domain);
+       return tls_offset;
+}
+
+void
+mono_thread_destroy_tls (uint32_t tls_offset)
+{
+       MonoTlsDataRecord *prev = NULL;
+       MonoTlsDataRecord *cur;
+       guint32 size = 0;
+       MonoDomain *domain = mono_domain_get ();
+       mono_domain_lock (domain);
+       cur = domain->tlsrec_list;
+       while (cur) {
+               if (cur->tls_offset == tls_offset) {
+                       if (prev)
+                               prev->next = cur->next;
+                       else
+                               domain->tlsrec_list = cur->next;
+                       size = cur->size;
+                       g_free (cur);
+                       break;
+               }
+               prev = cur;
+               cur = cur->next;
+       }
+       mono_domain_unlock (domain);
+       if (size)
+               mono_special_static_data_free_slot (tls_offset, size);
+}
+
+/*
+ * This is just to ensure cleanup: the finalizers should have taken care, so this is not perf-critical.
+ */
+void
+mono_thread_destroy_domain_tls (MonoDomain *domain)
+{
+       while (domain->tlsrec_list)
+               mono_thread_destroy_tls (domain->tlsrec_list->tls_offset);
+}
+
 static MonoClassField *local_slots = NULL;
 
 typedef struct {
index 4c8d4e945e07f9b1fec505c8238c95f5cf77a5dd..c782f257ff84f5ee67974355a5b6b7998ae1ee9a 100644 (file)
@@ -5594,6 +5594,85 @@ is_supported_tail_call (MonoCompile *cfg, MonoMethod *method, MonoMethod *cmetho
        return supported_tail_call;
 }
 
+/* the JIT intercepts ldflda instructions to the tlsdata field in ThreadLocal<T> and redirects
+ * it to the thread local value based on the tls_offset field. Every other kind of access to
+ * the field causes an assert.
+ */
+static gboolean
+is_magic_tls_access (MonoClassField *field)
+{
+       if (strcmp (field->name, "tlsdata"))
+               return FALSE;
+       if (strcmp (field->parent->name, "ThreadLocal`1"))
+               return FALSE;
+       return field->parent->image == mono_defaults.corlib;
+}
+
+/* emits the code needed to access a managed tls var (like ThreadStatic)
+ * with the value of the tls offset in offset_reg. thread_ins represents the MonoInternalThread
+ * pointer for the current thread.
+ * Returns the MonoInst* representing the address of the tls var.
+ */
+static MonoInst*
+emit_managed_static_data_access (MonoCompile *cfg, MonoInst *thread_ins, int offset_reg)
+{
+       MonoInst *addr;
+       int static_data_reg, array_reg, dreg;
+       int offset2_reg, idx_reg;
+       // inlined access to the tls data
+       // idx = (offset >> 24) - 1;
+       // return ((char*) thread->static_data [idx]) + (offset & 0xffffff);
+       static_data_reg = alloc_ireg (cfg);
+       MONO_EMIT_NEW_LOAD_MEMBASE (cfg, static_data_reg, thread_ins->dreg, G_STRUCT_OFFSET (MonoInternalThread, static_data));
+       idx_reg = alloc_ireg (cfg);
+       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHR_IMM, idx_reg, offset_reg, 24);
+       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISUB_IMM, idx_reg, idx_reg, 1);
+       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ISHL_IMM, idx_reg, idx_reg, sizeof (gpointer) == 8 ? 3 : 2);
+       MONO_EMIT_NEW_BIALU (cfg, OP_PADD, static_data_reg, static_data_reg, idx_reg);
+       array_reg = alloc_ireg (cfg);
+       MONO_EMIT_NEW_LOAD_MEMBASE (cfg, array_reg, static_data_reg, 0);
+       offset2_reg = alloc_ireg (cfg);
+       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IAND_IMM, offset2_reg, offset_reg, 0xffffff);
+       dreg = alloc_ireg (cfg);
+       EMIT_NEW_BIALU (cfg, addr, OP_PADD, dreg, array_reg, offset2_reg);
+       return addr;
+}
+
+/*
+ * redirect access to the tlsdata field to the tls var given by the tls_offset field.
+ * this address is cached per-method in cached_tls_addr.
+ */
+static MonoInst*
+create_magic_tls_access (MonoCompile *cfg, MonoClassField *tls_field, MonoInst **cached_tls_addr, MonoInst *thread_local)
+{
+       MonoInst *load, *addr, *temp, *store, *thread_ins;
+       MonoClassField *offset_field;
+
+       if (*cached_tls_addr) {
+               EMIT_NEW_TEMPLOAD (cfg, addr, (*cached_tls_addr)->inst_c0);
+               return addr;
+       }
+       thread_ins = mono_get_thread_intrinsic (cfg);
+       offset_field = mono_class_get_field_from_name (tls_field->parent, "tls_offset");
+
+       EMIT_NEW_LOAD_MEMBASE_TYPE (cfg, load, offset_field->type, thread_local->dreg, offset_field->offset);
+       if (thread_ins) {
+               MONO_ADD_INS (cfg->cbb, thread_ins);
+       } else {
+               MonoMethod *thread_method;
+               thread_method = mono_class_get_method_from_name (mono_get_thread_class(), "CurrentInternalThread_internal", 0);
+               thread_ins = mono_emit_method_call (cfg, thread_method, NULL, NULL);
+       }
+       addr = emit_managed_static_data_access (cfg, thread_ins, load->dreg);
+       addr->klass = mono_class_from_mono_type (tls_field->type);
+       addr->type = STACK_MP;
+       *cached_tls_addr = temp = mono_compile_create_var (cfg, type_from_stack_type (addr), OP_LOCAL);
+       EMIT_NEW_TEMPSTORE (cfg, store, temp->inst_c0, addr);
+
+       EMIT_NEW_TEMPLOAD (cfg, addr, temp->inst_c0);
+       return addr;
+}
+
 /*
  * mono_method_to_ir:
  *
@@ -5633,6 +5712,7 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
        int context_used;
        gboolean init_locals, seq_points, skip_dead_blocks;
        gboolean disable_inline;
+       MonoInst *cached_tls_addr = NULL;
 
        disable_inline = is_jit_optimizer_disabled (method);
 
@@ -8497,6 +8577,8 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
                                FIELD_ACCESS_FAILURE;
                        mono_class_init (klass);
 
+                       if (*ip != CEE_LDFLDA && is_magic_tls_access (field))
+                               UNVERIFIED;
                        /* XXX this is technically required but, so far (SL2), no [SecurityCritical] types (not many exists) have
                           any visible *instance* field  (in fact there's a single case for a static field in Marshal) XXX
                        if (mono_security_get_mode () == MONO_SECURITY_MODE_CORE_CLR)
@@ -8598,17 +8680,22 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
                                }
 
                                if (*ip == CEE_LDFLDA) {
-                                       if (sp [0]->type == STACK_OBJ) {
-                                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sp [0]->dreg, 0);
-                                               MONO_EMIT_NEW_COND_EXC (cfg, EQ, "NullReferenceException");
-                                       }
+                                       if (is_magic_tls_access (field)) {
+                                               ins = sp [0];
+                                               *sp++ = create_magic_tls_access (cfg, field, &cached_tls_addr, ins);
+                                       } else {
+                                               if (sp [0]->type == STACK_OBJ) {
+                                                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, sp [0]->dreg, 0);
+                                                       MONO_EMIT_NEW_COND_EXC (cfg, EQ, "NullReferenceException");
+                                               }
 
-                                       dreg = alloc_ireg_mp (cfg);
+                                               dreg = alloc_ireg_mp (cfg);
 
-                                       EMIT_NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, dreg, sp [0]->dreg, foffset);
-                                       ins->klass = mono_class_from_mono_type (field->type);
-                                       ins->type = STACK_MP;
-                                       *sp++ = ins;
+                                               EMIT_NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, dreg, sp [0]->dreg, foffset);
+                                               ins->klass = mono_class_from_mono_type (field->type);
+                                               ins->type = STACK_MP;
+                                               *sp++ = ins;
+                                       }
                                } else {
                                        MonoInst *load;