Mon Jan 30 12:51:10 CET 2006 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-ppc.c
index 52ebbe334e6bbc549758e391d9a13ea6b2edae2c..1435de5ee9864ae92fbbcc3fce236e93917fc347 100644 (file)
 #include "cpu-g4.h"
 #include "trace.h"
 
+enum {
+       TLS_MODE_DETECT,
+       TLS_MODE_FAILED,
+       TLS_MODE_LTHREADS,
+       TLS_MODE_NPTL,
+       TLS_MODE_DARWIN_G4,
+       TLS_MODE_DARWIN_G5
+};
+
 int mono_exc_esp_offset = 0;
+static int tls_mode = TLS_MODE_DETECT;
+static int lmf_pthread_key = -1;
+static int monothread_key = -1;
+static int monodomain_key = -1;
+
+static int
+offsets_from_pthread_key (guint32 key, int *offset2)
+{
+       int idx1 = key / 32;
+       int idx2 = key % 32;
+       *offset2 = idx2 * sizeof (gpointer);
+       return 284 + idx1 * sizeof (gpointer);
+}
+
+#define emit_linuxthreads_tls(code,dreg,key) do {\
+               int off1, off2; \
+               off1 = offsets_from_pthread_key ((key), &off2); \
+               ppc_lwz ((code), (dreg), off1, ppc_r2); \
+               ppc_lwz ((code), (dreg), off2, (dreg)); \
+       } while (0);
+
+#define emit_darwing5_tls(code,dreg,key) do {\
+               int off1 = 0x48 + key * sizeof (gpointer);      \
+               ppc_mfspr ((code), (dreg), 104);        \
+               ppc_lwz ((code), (dreg), off1, (dreg)); \
+       } while (0);
+
+/* FIXME: ensure the sc call preserves all but r3 */
+#define emit_darwing4_tls(code,dreg,key) do {\
+               int off1 = 0x48 + key * sizeof (gpointer);      \
+               if ((dreg) != ppc_r3) ppc_mr ((code), ppc_r11, ppc_r3); \
+               ppc_li ((code), ppc_r0, 0x7FF2);        \
+               ppc_sc ((code));        \
+               ppc_lwz ((code), (dreg), off1, ppc_r3); \
+               if ((dreg) != ppc_r3) ppc_mr ((code), ppc_r3, ppc_r11); \
+       } while (0);
+
+#define emit_tls_access(code,dreg,key) do {    \
+               switch (tls_mode) {     \
+               case TLS_MODE_LTHREADS: emit_linuxthreads_tls(code,dreg,key); break;    \
+               case TLS_MODE_DARWIN_G5: emit_darwing5_tls(code,dreg,key); break;       \
+               case TLS_MODE_DARWIN_G4: emit_darwing4_tls(code,dreg,key); break;       \
+               default: g_assert_not_reached ();       \
+               }       \
+       } while (0)
 
 const char*
 mono_arch_regname (int reg) {
@@ -160,7 +214,7 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask)
        guint32 opts = 0;
 
        /* no ppc-specific optimizations yet */
-       *exclude_mask = MONO_OPT_INLINE;
+       *exclude_mask = 0;
        return opts;
 }
 
@@ -175,6 +229,7 @@ is_regsize_var (MonoType *t) {
        case MONO_TYPE_I:
        case MONO_TYPE_U:
        case MONO_TYPE_PTR:
+       case MONO_TYPE_FNPTR:
                return TRUE;
        case MONO_TYPE_OBJECT:
        case MONO_TYPE_STRING:
@@ -259,8 +314,15 @@ mono_arch_flush_icache (guint8 *code, gint size)
        guint8 *p;
 
        p = code;
-       for (i = 0; i < size; i += MIN_CACHE_LINE, p += MIN_CACHE_LINE) {
-               asm ("dcbst 0,%0;" : : "r"(p) : "memory");
+       /* use dcbf for smp support, later optimize for UP, see pem._64bit.d20030611.pdf page 211 */
+       if (1) {
+               for (i = 0; i < size; i += MIN_CACHE_LINE, p += MIN_CACHE_LINE) {
+                       asm ("dcbf 0,%0;" : : "r"(p) : "memory");
+               }
+       } else {
+               for (i = 0; i < size; i += MIN_CACHE_LINE, p += MIN_CACHE_LINE) {
+                       asm ("dcbst 0,%0;" : : "r"(p) : "memory");
+               }
        }
        asm ("sync");
        p = code;
@@ -279,7 +341,7 @@ mono_arch_flush_icache (guint8 *code, gint size)
 #define FP_ALSO_IN_REG(s) s
 #else
 #define ALWAYS_ON_STACK(s)
-#define FP_ALSO_IN_REG(s) s
+#define FP_ALSO_IN_REG(s)
 #define ALIGN_DOUBLES
 #endif
 
@@ -345,6 +407,25 @@ add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple)
        (*gr) ++;
 }
 
+#if __APPLE__
+/* size == 4 is checked already */
+static gboolean
+has_only_a_r4_field (MonoClass *klass)
+{
+       gpointer iter;
+       MonoClassField *f;
+       iter = NULL;
+       while ((f = mono_class_get_fields (klass, &iter))) {
+               if (!(f->type->attrs & FIELD_ATTRIBUTE_STATIC)) {
+                       if (!f->type->byref && f->type->type == MONO_TYPE_R4)
+                               return TRUE;
+                       return FALSE;
+               }
+       }
+       return FALSE;
+}
+#endif
+
 static CallInfo*
 calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 {
@@ -428,6 +509,27 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                            size = mono_class_native_size (klass, NULL);
                        else
                            size = mono_class_value_size (klass, NULL);
+#if __APPLE__
+                       if (size == 4 && has_only_a_r4_field (klass)) {
+                               cinfo->args [n].size = 4;
+
+                               /* It was 7, now it is 8 in LinuxPPC */
+                               if (fr <= PPC_LAST_FPARG_REG) {
+                                       cinfo->args [n].regtype = RegTypeFP;
+                                       cinfo->args [n].reg = fr;
+                                       fr ++;
+                                       FP_ALSO_IN_REG (gr ++);
+                                       ALWAYS_ON_STACK (stack_size += 4);
+                               } else {
+                                       cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
+                                       cinfo->args [n].regtype = RegTypeBase;
+                                       cinfo->args [n].reg = ppc_sp; /* in the caller*/
+                                       stack_size += 4;
+                               }
+                               n++;
+                               break;
+                       }
+#endif
                        DEBUG(printf ("load %d bytes struct\n",
                                      mono_class_native_size (sig->params [i]->data.klass, NULL)));
 #if PPC_PASS_STRUCTS_BY_VALUE
@@ -608,6 +710,12 @@ mono_arch_allocate_vars (MonoCompile *m)
         */
        if (m->flags & MONO_CFG_HAS_VARARGS)
                m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
+       /* gtk-sharp and other broken code will dllimport vararg functions even with
+        * non-varargs signatures. Since there is little hope people will get this right
+        * we assume they won't.
+        */
+       if (m->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)
+               m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
 
        header = mono_method_get_header (m->method);
 
@@ -627,7 +735,7 @@ mono_arch_allocate_vars (MonoCompile *m)
                m->used_int_regs |= 1 << frame_reg;
        }
 
-       sig = m->method->signature;
+       sig = mono_method_signature (m->method);
        
        offset = 0;
        curinst = 0;
@@ -881,18 +989,6 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
  * Allow tracing to work with this interface (with an optional argument)
  */
 
-/*
- * This may be needed on some archs or for debugging support.
- */
-void
-mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
-{
-       /* no stack room needed now (may be needed for FASTCALL-trace support) */
-       *stack = 0;
-       /* split prolog-epilog requirements? */
-       *code = 50; /* max bytes needed: check this number */
-}
-
 void*
 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
 {
@@ -919,12 +1015,20 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
 {
        guchar *code = p;
        int save_mode = SAVE_NONE;
+       int offset;
        MonoMethod *method = cfg->method;
-       int rtype = mono_type_get_underlying_type (method->signature->ret)->type;
+       int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type;
        int save_offset = PPC_STACK_PARAM_OFFSET + cfg->param_area;
        save_offset += 15;
        save_offset &= ~15;
        
+       offset = code - cfg->native_code;
+       /* we need about 16 instructions */
+       if (offset > (cfg->code_size - 16 * 4)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               code = cfg->native_code + offset;
+       }
 handle_enum:
        switch (rtype) {
        case MONO_TYPE_VOID:
@@ -1018,7 +1122,7 @@ handle_enum:
  */
 typedef struct {
        MonoBasicBlock *bb;
-       void *ip;
+       guint32 ip_offset;
        guint16 b0_cond;
        guint16 b1_cond;
 } MonoOvfJump;
@@ -1039,7 +1143,7 @@ if (ins->flags & MONO_INST_BRLABEL) { \
                if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \
                        MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump));    \
                        ovfj->bb = ins->inst_true_bb;   \
-                       ovfj->ip = NULL;        \
+                       ovfj->ip_offset = 0;    \
                        ovfj->b0_cond = (b0);   \
                        ovfj->b1_cond = (b1);   \
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB_OVF, ovfj); \
@@ -1064,7 +1168,7 @@ if (ins->flags & MONO_INST_BRLABEL) { \
                if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \
                        MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump));    \
                        ovfj->bb = NULL;        \
-                       ovfj->ip = code;        \
+                       ovfj->ip_offset = code - cfg->native_code;      \
                        ovfj->b0_cond = (b0);   \
                        ovfj->b1_cond = (b1);   \
                        /* FIXME: test this code */     \
@@ -1074,7 +1178,7 @@ if (ins->flags & MONO_INST_BRLABEL) { \
                } else {        \
                        mono_add_patch_info (cfg, code - cfg->native_code,   \
                                    MONO_PATCH_INFO_EXC, exc_name);  \
-                       ppc_bc (code, (b0), (b1), 0);   \
+                       ppc_bcl (code, (b0), (b1), 0);  \
                }       \
        } while (0); 
 
@@ -2274,6 +2378,25 @@ ppc_patch (guchar *code, guchar *target)
                        ins |= diff;
                }
                *(guint32*)code = ins;
+               return;
+       }
+
+       if (prim == 15 || ins == 0x4e800021) {
+               guint32 *seq;
+               /* the trampoline code will try to patch the blrl */
+               if (ins == 0x4e800021) {
+                       code -= 12;
+               }
+               /* this is the lis/ori/mtlr/blrl sequence */
+               seq = (guint32*)code;
+               g_assert ((seq [0] >> 26) == 15);
+               g_assert ((seq [1] >> 26) == 24);
+               g_assert ((seq [2] >> 26) == 31);
+               g_assert (seq [3] == 0x4e800021);
+               /* FIXME: make this thread safe */
+               ppc_lis (code, ppc_r0, (guint32)(target) >> 16);
+               ppc_ori (code, ppc_r0, ppc_r0, (guint32)(target) & 0xffff);
+               mono_arch_flush_icache (code - 8, 8);
        } else {
                g_assert_not_reached ();
        }
@@ -2328,6 +2451,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                mono_debug_record_line_number (cfg, ins, offset);
 
                switch (ins->opcode) {
+               case OP_TLS_GET:
+                       emit_tls_access (code, ins->dreg, ins->inst_offset);
+                       break;
                case OP_BIGMUL:
                        ppc_mullw (code, ppc_r4, ins->sreg1, ins->sreg2);
                        ppc_mulhw (code, ppc_r3, ins->sreg1, ins->sreg2);
@@ -2336,6 +2462,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mullw (code, ppc_r4, ins->sreg1, ins->sreg2);
                        ppc_mulhwu (code, ppc_r3, ins->sreg1, ins->sreg2);
                        break;
+               case OP_MEMORY_BARRIER:
+                       ppc_sync (code);
+                       break;
                case OP_STOREI1_MEMBASE_IMM:
                        ppc_li (code, ppc_r0, ins->inst_imm);
                        if (ppc_is_imm16 (ins->inst_offset)) {
@@ -2875,7 +3004,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
                        else
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
-                       ppc_bl (code, 0);
+                       if (cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
                        break;
                case OP_FCALL_REG:
                case OP_LCALL_REG:
@@ -2898,7 +3034,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                        break;
                case OP_LOCALLOC: {
-                       guint32 * zero_loop_jump, zero_loop_start;
+                       guint32 * zero_loop_jump, zero_loop_start;
                        /* keep alignment */
                        int alloca_waste = PPC_STACK_PARAM_OFFSET + cfg->param_area + 31;
                        int area_offset = alloca_waste;
@@ -2937,7 +3073,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mr (code, ppc_r3, ins->sreg1);
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                             (gpointer)"mono_arch_throw_exception");
-                       ppc_bl (code, 0);
+                       if (cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
                        break;
                }
                case OP_RETHROW: {
@@ -2945,7 +3088,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mr (code, ppc_r3, ins->sreg1);
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                             (gpointer)"mono_arch_rethrow_exception");
-                       ppc_bl (code, 0);
+                       if (cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
                        break;
                }
                case OP_START_HANDLER:
@@ -3090,6 +3240,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_STORER4_MEMBASE_REG:
+                       ppc_frsp (code, ins->sreg1, ins->sreg1);
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stfs (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
@@ -3343,15 +3494,15 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                        *((gpointer *)(ip)) = code + patch_info->data.offset;
                        continue;
                case MONO_PATCH_INFO_SWITCH: {
-                       gpointer *table = (gpointer *)patch_info->data.target;
+                       gpointer *table = (gpointer *)patch_info->data.table->table;
                        int i;
 
                        // FIXME: inspect code to get the register
-                       ppc_load (ip, ppc_r11, patch_info->data.target);
+                       ppc_load (ip, ppc_r11, table);
                        //*((gconstpointer *)(ip + 2)) = patch_info->data.target;
 
-                       for (i = 0; i < patch_info->data.table->table_size; i++) {
-                               table [i] = (int)patch_info->data.table [i] + code;
+                       for (i = 0; i < patch_info->data.table->table_size; i++) { 
+                               table [i] = (int)patch_info->data.table->table [i] + code;
                        }
                        /* we put into the table the absolute address, no need for ppc_patch in this case */
                        continue;
@@ -3378,6 +3529,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                        g_assert_not_reached ();
                        *((gconstpointer *)(ip + 1)) = patch_info->data.name;
                        continue;
+               case MONO_PATCH_INFO_NONE:
                case MONO_PATCH_INFO_BB_OVF:
                case MONO_PATCH_INFO_EXC_OVF:
                        /* everything is dealt with at epilog output time */
@@ -3389,39 +3541,6 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
        }
 }
 
-int
-mono_arch_max_epilog_size (MonoCompile *cfg)
-{
-       int max_epilog_size = 16 + 20*4;
-       MonoJumpInfo *patch_info;
-       
-       if (cfg->method->save_lmf)
-               max_epilog_size += 128;
-       
-       if (mono_jit_trace_calls != NULL)
-               max_epilog_size += 50;
-
-       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
-               max_epilog_size += 50;
-
-       /* count the number of exception infos */
-     
-       /* 
-        * make sure we have enough space for exceptions
-        * 24 is the simulated call to throw_exception_by_name
-        */
-       for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
-               if (patch_info->type == MONO_PATCH_INFO_EXC)
-                       max_epilog_size += 24;
-               else if (patch_info->type == MONO_PATCH_INFO_BB_OVF)
-                       max_epilog_size += 12;
-               else if (patch_info->type == MONO_PATCH_INFO_EXC_OVF)
-                       max_epilog_size += 12;
-       }
-
-       return max_epilog_size;
-}
-
 /*
  * Stack frame layout:
  * 
@@ -3456,7 +3575,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
                tracing = 1;
 
-       sig = method->signature;
+       sig = mono_method_signature (method);
        cfg->code_size = 256 + sig->param_count * 20;
        code = cfg->native_code = g_malloc (cfg->code_size);
 
@@ -3696,9 +3815,22 @@ register.  Should this case include linux/ppc?
 
        if (method->save_lmf) {
 
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+               if (lmf_pthread_key != -1) {
+                       emit_tls_access (code, ppc_r3, lmf_pthread_key);
+                       if (G_STRUCT_OFFSET (MonoJitTlsData, lmf))
+                               ppc_addi (code, ppc_r3, ppc_r3, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
+               } else {
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                     (gpointer)"mono_get_lmf_addr");
-               ppc_bl (code, 0);
+                       if (cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
+               }
                /* we build the MonoLMF structure on the stack - see mini-ppc.h */
                /* lmf_offset is the offset from the previous stack pointer,
                 * alloc_size is the total stack space allocated, so the offset
@@ -3740,8 +3872,24 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        MonoJumpInfo *patch_info;
        MonoMethod *method = cfg->method;
        int pos, i;
+       int max_epilog_size = 16 + 20*4;
        guint8 *code;
 
+       if (cfg->method->save_lmf)
+               max_epilog_size += 128;
+       
+       if (mono_jit_trace_calls != NULL)
+               max_epilog_size += 50;
+
+       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
+               max_epilog_size += 50;
+
+       while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               mono_jit_stats.code_reallocs++;
+       }
+
        /*
         * Keep in sync with CEE_JMP
         */
@@ -3813,6 +3961,73 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        }
        ppc_blr (code);
 
+       cfg->code_len = code - cfg->native_code;
+
+       g_assert (cfg->code_len < cfg->code_size);
+
+}
+
+/* remove once throw_exception_by_name is eliminated */
+static int
+exception_id_by_name (const char *name)
+{
+       if (strcmp (name, "IndexOutOfRangeException") == 0)
+               return MONO_EXC_INDEX_OUT_OF_RANGE;
+       if (strcmp (name, "OverflowException") == 0)
+               return MONO_EXC_OVERFLOW;
+       if (strcmp (name, "ArithmeticException") == 0)
+               return MONO_EXC_ARITHMETIC;
+       if (strcmp (name, "DivideByZeroException") == 0)
+               return MONO_EXC_DIVIDE_BY_ZERO;
+       if (strcmp (name, "InvalidCastException") == 0)
+               return MONO_EXC_INVALID_CAST;
+       if (strcmp (name, "NullReferenceException") == 0)
+               return MONO_EXC_NULL_REF;
+       if (strcmp (name, "ArrayTypeMismatchException") == 0)
+               return MONO_EXC_ARRAY_TYPE_MISMATCH;
+       g_error ("Unknown intrinsic exception %s\n", name);
+       return 0;
+}
+
+void
+mono_arch_emit_exceptions (MonoCompile *cfg)
+{
+       MonoJumpInfo *patch_info;
+       int nthrows, i;
+       guint8 *code;
+       const guint8* exc_throw_pos [MONO_EXC_INTRINS_NUM] = {NULL};
+       guint8 exc_throw_found [MONO_EXC_INTRINS_NUM] = {0};
+       guint32 code_size;
+       int exc_count = 0;
+       int max_epilog_size = 50;
+
+       /* count the number of exception infos */
+     
+       /* 
+        * make sure we have enough space for exceptions
+        * 24 is the simulated call to throw_exception_by_name
+        */
+       for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
+               if (patch_info->type == MONO_PATCH_INFO_EXC) {
+                       i = exception_id_by_name (patch_info->data.target);
+                       if (!exc_throw_found [i]) {
+                               max_epilog_size += 12;
+                               exc_throw_found [i] = TRUE;
+                       }
+               } else if (patch_info->type == MONO_PATCH_INFO_BB_OVF)
+                       max_epilog_size += 12;
+               else if (patch_info->type == MONO_PATCH_INFO_EXC_OVF)
+                       max_epilog_size += 12;
+       }
+
+       while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               mono_jit_stats.code_reallocs++;
+       }
+
+       code = cfg->native_code + cfg->code_len;
+
        /* add code to raise exceptions */
        for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
                switch (patch_info->type) {
@@ -3840,18 +4055,24 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                        ppc_patch (code - 4, ip + 4); /* jump back after the initiali branch */
                        /* jump back to the true target */
                        ppc_b (code, 0);
-                       ip = (char*)ovfj->ip + 4;
+                       ip = cfg->native_code + ovfj->ip_offset + 4;
                        ppc_patch (code - 4, ip);
                        break;
                }
                case MONO_PATCH_INFO_EXC: {
                        unsigned char *ip = patch_info->ip.i + cfg->native_code;
+                       i = exception_id_by_name (patch_info->data.target);
+                       if (exc_throw_pos [i]) {
+                               ppc_patch (ip, exc_throw_pos [i]);
+                               patch_info->type = MONO_PATCH_INFO_NONE;
+                               break;
+                       } else {
+                               exc_throw_pos [i] = code;
+                       }
                        ppc_patch (ip, code);
                        /*mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);*/
                        ppc_load (code, ppc_r3, patch_info->data.target);
-                       /* simulate a call from ip */
-                       ppc_load (code, ppc_r0, ip + 4);
-                       ppc_mtlr (code, ppc_r0);
+                       /* we got here from a conditional call, so the calling ip is set in lr already */
                        patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
                        patch_info->data.name = "mono_arch_throw_exception_by_name";
                        patch_info->ip.i = code - cfg->native_code;
@@ -3870,9 +4091,145 @@ mono_arch_emit_epilog (MonoCompile *cfg)
 
 }
 
+static int
+try_offset_access (void *value, guint32 idx)
+{
+       register void* me __asm__ ("r2");
+       void ***p = (void***)((char*)me + 284);
+       int idx1 = idx / 32;
+       int idx2 = idx % 32;
+       if (!p [idx1])
+               return 0;
+       if (value != p[idx1][idx2])
+               return 0;
+       return 1;
+}
+
+static void
+setup_tls_access (void)
+{
+       guint32 ptk;
+       guint32 *ins, *code;
+       guint32 cmplwi_1023, li_0x48, blr_ins;
+       if (tls_mode == TLS_MODE_FAILED)
+               return;
+
+       if (g_getenv ("MONO_NO_TLS")) {
+               tls_mode = TLS_MODE_FAILED;
+               return;
+       }
+
+       if (tls_mode == TLS_MODE_DETECT) {
+               ins = (guint32*)pthread_getspecific;
+               /* uncond branch to the real method */
+               if ((*ins >> 26) == 18) {
+                       gint32 val;
+                       val = (*ins & ~3) << 6;
+                       val >>= 6;
+                       if (*ins & 2) {
+                               /* absolute */
+                               ins = (guint32*)val;
+                       } else {
+                               ins = (guint32*) ((char*)ins + val);
+                       }
+               }
+               code = &cmplwi_1023;
+               ppc_cmpli (code, 0, 0, ppc_r3, 1023);
+               code = &li_0x48;
+               ppc_li (code, ppc_r4, 0x48);
+               code = &blr_ins;
+               ppc_blr (code);
+               if (*ins == cmplwi_1023) {
+                       int found_lwz_284 = 0;
+                       for (ptk = 0; ptk < 20; ++ptk) {
+                               ++ins;
+                               if (!*ins || *ins == blr_ins)
+                                       break;
+                               if ((guint16)*ins == 284 && (*ins >> 26) == 32) {
+                                       found_lwz_284 = 1;
+                                       break;
+                               }
+                       }
+                       if (!found_lwz_284) {
+                               tls_mode = TLS_MODE_FAILED;
+                               return;
+                       }
+                       tls_mode = TLS_MODE_LTHREADS;
+               } else if (*ins == li_0x48) {
+                       ++ins;
+                       /* uncond branch to the real method */
+                       if ((*ins >> 26) == 18) {
+                               gint32 val;
+                               val = (*ins & ~3) << 6;
+                               val >>= 6;
+                               if (*ins & 2) {
+                                       /* absolute */
+                                       ins = (guint32*)val;
+                               } else {
+                                       ins = (guint32*) ((char*)ins + val);
+                               }
+                               code = &val;
+                               ppc_li (code, ppc_r0, 0x7FF2);
+                               if (ins [1] == val) {
+                                       /* Darwin on G4, implement */
+                                       tls_mode = TLS_MODE_FAILED;
+                                       return;
+                               } else {
+                                       code = &val;
+                                       ppc_mfspr (code, ppc_r3, 104);
+                                       if (ins [1] != val) {
+                                               tls_mode = TLS_MODE_FAILED;
+                                               return;
+                                       }
+                                       tls_mode = TLS_MODE_DARWIN_G5;
+                               }
+                       } else {
+                               tls_mode = TLS_MODE_FAILED;
+                               return;
+                       }
+               } else {
+                       tls_mode = TLS_MODE_FAILED;
+                       return;
+               }
+       }
+       if (monodomain_key == -1) {
+               ptk = mono_domain_get_tls_key ();
+               if (ptk < 1024) {
+                       ptk = mono_pthread_key_for_tls (ptk);
+                       if (ptk < 1024) {
+                               monodomain_key = ptk;
+                       }
+               }
+       }
+       if (lmf_pthread_key == -1) {
+               ptk = mono_pthread_key_for_tls (mono_jit_tls_id);
+               if (ptk < 1024) {
+                       /*g_print ("MonoLMF at: %d\n", ptk);*/
+                       /*if (!try_offset_access (mono_get_lmf_addr (), ptk)) {
+                               init_tls_failed = 1;
+                               return;
+                       }*/
+                       lmf_pthread_key = ptk;
+               }
+       }
+       if (monothread_key == -1) {
+               ptk = mono_thread_get_tls_key ();
+               if (ptk < 1024) {
+                       ptk = mono_pthread_key_for_tls (ptk);
+                       if (ptk < 1024) {
+                               monothread_key = ptk;
+                               /*g_print ("thread inited: %d\n", ptk);*/
+                       }
+               } else {
+                       /*g_print ("thread not inited yet %d\n", ptk);*/
+               }
+       }
+}
+
 void
 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
 {
+       setup_tls_access ();
 }
 
 void
@@ -3911,18 +4268,19 @@ mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_re
 MonoInst*
 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
 {
-       /*
        MonoInst *ins = NULL;
 
-       if (cmethod->klass == mono_defaults.math_class) {
+       if (cmethod->klass == mono_defaults.thread_class &&
+                       strcmp (cmethod->name, "MemoryBarrier") == 0) {
+               MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
+       }
+       /*if (cmethod->klass == mono_defaults.math_class) {
                if (strcmp (cmethod->name, "Sqrt") == 0) {
                        MONO_INST_NEW (cfg, ins, OP_SQRT);
                        ins->inst_i0 = args [0];
                }
-       }
+       }*/
        return ins;
-       */
-       return NULL;
 }
 
 gboolean
@@ -3933,10 +4291,28 @@ mono_arch_print_tree (MonoInst *tree, int arity)
 
 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
 {
-       return NULL;
+       MonoInst* ins;
+
+       setup_tls_access ();
+       if (monodomain_key == -1)
+               return NULL;
+       
+       MONO_INST_NEW (cfg, ins, OP_TLS_GET);
+       ins->inst_offset = monodomain_key;
+       return ins;
 }
 
-MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
+MonoInst* 
+mono_arch_get_thread_intrinsic (MonoCompile* cfg)
 {
-       return NULL;
+       MonoInst* ins;
+
+       setup_tls_access ();
+       if (monothread_key == -1)
+               return NULL;
+       
+       MONO_INST_NEW (cfg, ins, OP_TLS_GET);
+       ins->inst_offset = monothread_key;
+       return ins;
 }
+