2006-06-20 Jb Evain <jbevain@gmail.com>
[mono.git] / mono / mini / mini-ppc.c
index d00e3bac91048de7d2af7dc0367c4f06c0ef937b..d467bd333019b19a0bb875bb04ed5bfd126b3dd8 100644 (file)
 #include "cpu-g4.h"
 #include "trace.h"
 
+enum {
+       TLS_MODE_DETECT,
+       TLS_MODE_FAILED,
+       TLS_MODE_LTHREADS,
+       TLS_MODE_NPTL,
+       TLS_MODE_DARWIN_G4,
+       TLS_MODE_DARWIN_G5
+};
+
 int mono_exc_esp_offset = 0;
+static int tls_mode = TLS_MODE_DETECT;
+static int lmf_pthread_key = -1;
+static int monothread_key = -1;
+static int monodomain_key = -1;
+
+static int
+offsets_from_pthread_key (guint32 key, int *offset2)
+{
+       int idx1 = key / 32;
+       int idx2 = key % 32;
+       *offset2 = idx2 * sizeof (gpointer);
+       return 284 + idx1 * sizeof (gpointer);
+}
+
+#define emit_linuxthreads_tls(code,dreg,key) do {\
+               int off1, off2; \
+               off1 = offsets_from_pthread_key ((key), &off2); \
+               ppc_lwz ((code), (dreg), off1, ppc_r2); \
+               ppc_lwz ((code), (dreg), off2, (dreg)); \
+       } while (0);
+
+#define emit_darwing5_tls(code,dreg,key) do {\
+               int off1 = 0x48 + key * sizeof (gpointer);      \
+               ppc_mfspr ((code), (dreg), 104);        \
+               ppc_lwz ((code), (dreg), off1, (dreg)); \
+       } while (0);
+
+/* FIXME: ensure the sc call preserves all but r3 */
+#define emit_darwing4_tls(code,dreg,key) do {\
+               int off1 = 0x48 + key * sizeof (gpointer);      \
+               if ((dreg) != ppc_r3) ppc_mr ((code), ppc_r11, ppc_r3); \
+               ppc_li ((code), ppc_r0, 0x7FF2);        \
+               ppc_sc ((code));        \
+               ppc_lwz ((code), (dreg), off1, ppc_r3); \
+               if ((dreg) != ppc_r3) ppc_mr ((code), ppc_r3, ppc_r11); \
+       } while (0);
+
+#define emit_tls_access(code,dreg,key) do {    \
+               switch (tls_mode) {     \
+               case TLS_MODE_LTHREADS: emit_linuxthreads_tls(code,dreg,key); break;    \
+               case TLS_MODE_DARWIN_G5: emit_darwing5_tls(code,dreg,key); break;       \
+               case TLS_MODE_DARWIN_G4: emit_darwing4_tls(code,dreg,key); break;       \
+               default: g_assert_not_reached ();       \
+               }       \
+       } while (0)
 
 const char*
 mono_arch_regname (int reg) {
@@ -36,11 +90,30 @@ mono_arch_regname (int reg) {
        return "unknown";
 }
 
-/* this function overwrites r0 */
-static guint32*
-emit_memcpy (guint32 *code, int size, int dreg, int doffset, int sreg, int soffset)
+/* this function overwrites r0, r11, r12 */
+static guint8*
+emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffset)
 {
        /* unrolled, use the counter in big */
+       if (size > sizeof (gpointer) * 5) {
+               int shifted = size >> 2;
+               guint8 *copy_loop_start, *copy_loop_jump;
+
+               ppc_load (code, ppc_r0, shifted);
+               ppc_mtctr (code, ppc_r0);
+               g_assert (sreg == ppc_r11);
+               ppc_addi (code, ppc_r12, dreg, (doffset - 4));
+               ppc_addi (code, ppc_r11, sreg, (soffset - 4));
+               copy_loop_start = code;
+               ppc_lwzu (code, ppc_r0, ppc_r11, 4);
+               ppc_stwu (code, ppc_r0, 4, ppc_r12);
+               copy_loop_jump = code;
+               ppc_bc (code, PPC_BR_DEC_CTR_NONZERO, 0, 0);
+               ppc_patch (copy_loop_jump, copy_loop_start);
+               size -= shifted * 4;
+               doffset = soffset = 0;
+               dreg = ppc_r12;
+       }
        while (size >= 4) {
                ppc_lwz (code, ppc_r0, soffset, sreg);
                ppc_stw (code, ppc_r0, doffset, dreg);
@@ -141,7 +214,7 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask)
        guint32 opts = 0;
 
        /* no ppc-specific optimizations yet */
-       *exclude_mask = MONO_OPT_INLINE|MONO_OPT_LINEARS;
+       *exclude_mask = 0;
        return opts;
 }
 
@@ -149,21 +222,26 @@ static gboolean
 is_regsize_var (MonoType *t) {
        if (t->byref)
                return TRUE;
+       t = mono_type_get_underlying_type (t);
        switch (t->type) {
        case MONO_TYPE_I4:
        case MONO_TYPE_U4:
        case MONO_TYPE_I:
        case MONO_TYPE_U:
+       case MONO_TYPE_PTR:
+       case MONO_TYPE_FNPTR:
                return TRUE;
        case MONO_TYPE_OBJECT:
        case MONO_TYPE_STRING:
        case MONO_TYPE_CLASS:
        case MONO_TYPE_SZARRAY:
        case MONO_TYPE_ARRAY:
+               return TRUE;
+       case MONO_TYPE_GENERICINST:
+               if (!mono_type_generic_inst_is_valuetype (t))
+                       return TRUE;
                return FALSE;
        case MONO_TYPE_VALUETYPE:
-               if (t->data.klass->enumtype)
-                       return is_regsize_var (t->data.klass->enum_basetype);
                return FALSE;
        }
        return FALSE;
@@ -205,10 +283,10 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
 {
        GList *regs = NULL;
        int i, top = 32;
-       if (cfg->flags & MONO_CFG_HAS_ALLOCA)
+       if (cfg->frame_reg != ppc_sp)
                top = 31;
 #if USE_EXTRA_TEMPS
-       top -= 2;
+       top = 29;
 #endif
        for (i = 13; i < top; ++i)
                regs = g_list_prepend (regs, GUINT_TO_POINTER (i));
@@ -227,7 +305,7 @@ guint32
 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
 {
        /* FIXME: */
-       return 3;
+       return 2;
 }
 
 // code from ppc/tramp.c, try to keep in sync
@@ -240,8 +318,15 @@ mono_arch_flush_icache (guint8 *code, gint size)
        guint8 *p;
 
        p = code;
-       for (i = 0; i < size; i += MIN_CACHE_LINE, p += MIN_CACHE_LINE) {
-               asm ("dcbst 0,%0;" : : "r"(p) : "memory");
+       /* use dcbf for smp support, later optimize for UP, see pem._64bit.d20030611.pdf page 211 */
+       if (1) {
+               for (i = 0; i < size; i += MIN_CACHE_LINE, p += MIN_CACHE_LINE) {
+                       asm ("dcbf 0,%0;" : : "r"(p) : "memory");
+               }
+       } else {
+               for (i = 0; i < size; i += MIN_CACHE_LINE, p += MIN_CACHE_LINE) {
+                       asm ("dcbst 0,%0;" : : "r"(p) : "memory");
+               }
        }
        asm ("sync");
        p = code;
@@ -260,7 +345,7 @@ mono_arch_flush_icache (guint8 *code, gint size)
 #define FP_ALSO_IN_REG(s) s
 #else
 #define ALWAYS_ON_STACK(s)
-#define FP_ALSO_IN_REG(s) s
+#define FP_ALSO_IN_REG(s)
 #define ALIGN_DOUBLES
 #endif
 
@@ -285,6 +370,7 @@ typedef struct {
        guint32 stack_usage;
        guint32 struct_ret;
        ArgInfo ret;
+       ArgInfo sig_cookie;
        ArgInfo args [1];
 } CallInfo;
 
@@ -325,6 +411,25 @@ add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple)
        (*gr) ++;
 }
 
+#if __APPLE__
+/* size == 4 is checked already */
+static gboolean
+has_only_a_r4_field (MonoClass *klass)
+{
+       gpointer iter;
+       MonoClassField *f;
+       iter = NULL;
+       while ((f = mono_class_get_fields (klass, &iter))) {
+               if (!(f->type->attrs & FIELD_ATTRIBUTE_STATIC)) {
+                       if (!f->type->byref && f->type->type == MONO_TYPE_R4)
+                               return TRUE;
+                       return FALSE;
+               }
+       }
+       return FALSE;
+}
+#endif
+
 static CallInfo*
 calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 {
@@ -350,6 +455,13 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
        }
         DEBUG(printf("params: %d\n", sig->param_count));
        for (i = 0; i < sig->param_count; ++i) {
+               if ((sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
+                        /* Prevent implicit arguments and sig_cookie from
+                          being passed in registers */
+                        gr = PPC_LAST_ARG_REG + 1;
+                        /* Emit the signature cookie just before the implicit arguments */
+                        add_general (&gr, &stack_size, &cinfo->sig_cookie, TRUE);
+                }
                 DEBUG(printf("param %d: ", i));
                if (sig->params [i]->byref) {
                         DEBUG(printf("byref\n"));
@@ -357,8 +469,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        n++;
                        continue;
                }
-               simpletype = sig->params [i]->type;
-       enum_calc_size:
+               simpletype = mono_type_get_underlying_type (sig->params [i])->type;
                switch (simpletype) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
@@ -393,29 +504,63 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        add_general (&gr, &stack_size, cinfo->args + n, TRUE);
                        n++;
                        break;
+               case MONO_TYPE_GENERICINST:
+                       if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
+                               cinfo->args [n].size = sizeof (gpointer);
+                               add_general (&gr, &stack_size, cinfo->args + n, TRUE);
+                               n++;
+                               break;
+                       }
+                       /* Fall through */
                case MONO_TYPE_VALUETYPE: {
                        gint size;
-                       if (sig->params [i]->data.klass->enumtype) {
-                               simpletype = sig->params [i]->data.klass->enum_basetype->type;
-                               goto enum_calc_size;
+                       MonoClass *klass;
+                       klass = mono_class_from_mono_type (sig->params [i]);
+                       if (is_pinvoke)
+                           size = mono_class_native_size (klass, NULL);
+                       else
+                           size = mono_class_value_size (klass, NULL);
+#if __APPLE__
+                       if (size == 4 && has_only_a_r4_field (klass)) {
+                               cinfo->args [n].size = 4;
+
+                               /* It was 7, now it is 8 in LinuxPPC */
+                               if (fr <= PPC_LAST_FPARG_REG) {
+                                       cinfo->args [n].regtype = RegTypeFP;
+                                       cinfo->args [n].reg = fr;
+                                       fr ++;
+                                       FP_ALSO_IN_REG (gr ++);
+                                       ALWAYS_ON_STACK (stack_size += 4);
+                               } else {
+                                       cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
+                                       cinfo->args [n].regtype = RegTypeBase;
+                                       cinfo->args [n].reg = ppc_sp; /* in the caller*/
+                                       stack_size += 4;
+                               }
+                               n++;
+                               break;
                        }
-                       size = mono_class_value_size (sig->params [i]->data.klass, NULL);
+#endif
                        DEBUG(printf ("load %d bytes struct\n",
-                                     mono_class_value_size (sig->params [i]->data.klass, NULL)));
+                                     mono_class_native_size (sig->params [i]->data.klass, NULL)));
 #if PPC_PASS_STRUCTS_BY_VALUE
                        {
-                               int nwords = (size + sizeof (gpointer) -1 ) / sizeof (gpointer);
+                               int align_size = size;
+                               int nwords = 0;
+                               align_size += (sizeof (gpointer) - 1);
+                               align_size &= ~(sizeof (gpointer) - 1);
+                               nwords = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
                                cinfo->args [n].regtype = RegTypeStructByVal;
-                               if (gr <= PPC_LAST_ARG_REG) {
+                               if (gr > PPC_LAST_ARG_REG || (size >= 3 && size % 4 != 0)) {
+                                       cinfo->args [n].size = 0;
+                                       cinfo->args [n].vtsize = nwords;
+                               } else {
                                        int rest = PPC_LAST_ARG_REG - gr + 1;
                                        int n_in_regs = rest >= nwords? nwords: rest;
                                        cinfo->args [n].size = n_in_regs;
                                        cinfo->args [n].vtsize = nwords - n_in_regs;
                                        cinfo->args [n].reg = gr;
                                        gr += n_in_regs;
-                               } else {
-                                       cinfo->args [n].size = 0;
-                                       cinfo->args [n].vtsize = nwords;
                                }
                                cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
                                /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/
@@ -504,7 +649,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
        }
 
        {
-               simpletype = sig->ret->type;
+               simpletype = mono_type_get_underlying_type (sig->ret)->type;
 enum_retvalue:
                switch (simpletype) {
                case MONO_TYPE_BOOLEAN:
@@ -535,12 +680,14 @@ enum_retvalue:
                        cinfo->ret.reg = ppc_f1;
                        cinfo->ret.regtype = RegTypeFP;
                        break;
-               case MONO_TYPE_VALUETYPE:
-                       if (sig->ret->data.klass->enumtype) {
-                               simpletype = sig->ret->data.klass->enum_basetype->type;
-                               goto enum_retvalue;
+               case MONO_TYPE_GENERICINST:
+                       if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
+                               cinfo->ret.reg = ppc_r3;
+                               break;
                        }
                        break;
+               case MONO_TYPE_VALUETYPE:
+                       break;
                case MONO_TYPE_TYPEDBYREF:
                case MONO_TYPE_VOID:
                        break;
@@ -571,26 +718,41 @@ mono_arch_allocate_vars (MonoCompile *m)
        int i, offset, size, align, curinst;
        int frame_reg = ppc_sp;
 
+
        /* allow room for the vararg method args: void* and long/double */
        if (mono_jit_trace_calls != NULL && mono_trace_eval (m->method))
-               m->param_area = MAX (m->param_area, 16);
+               m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
+       /* this is bug #60332: remove when #59509 is fixed, so no weird vararg 
+        * call convs needs to be handled this way.
+        */
+       if (m->flags & MONO_CFG_HAS_VARARGS)
+               m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
+       /* gtk-sharp and other broken code will dllimport vararg functions even with
+        * non-varargs signatures. Since there is little hope people will get this right
+        * we assume they won't.
+        */
+       if (m->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)
+               m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
+
+       header = mono_method_get_header (m->method);
 
        /* 
-        * FIXME: we'll use the frame register also for any method that has
-        * filter clauses. This way, when the handlers are called,
+        * We use the frame register also for any method that has
+        * exception clauses. This way, when the handlers are called,
         * the code will reference local variables using the frame reg instead of
         * the stack pointer: if we had to restore the stack pointer, we'd
         * corrupt the method frames that are already on the stack (since
         * filters get called before stack unwinding happens) when the filter
-        * code would call any method.
+        * code would call any method (this also applies to finally etc.).
         */ 
-       if (m->flags & MONO_CFG_HAS_ALLOCA)
+       if ((m->flags & MONO_CFG_HAS_ALLOCA) || header->num_clauses)
                frame_reg = ppc_r31;
        m->frame_reg = frame_reg;
+       if (frame_reg != ppc_sp) {
+               m->used_int_regs |= 1 << frame_reg;
+       }
 
-       header = ((MonoMethodNormal *)m->method)->header;
-
-       sig = m->method->signature;
+       sig = mono_method_signature (m->method);
        
        offset = 0;
        curinst = 0;
@@ -599,7 +761,7 @@ mono_arch_allocate_vars (MonoCompile *m)
                m->ret->inst_c0 = ppc_r3;
        } else {
                /* FIXME: handle long and FP values */
-               switch (sig->ret->type) {
+               switch (mono_type_get_underlying_type (sig->ret)->type) {
                case MONO_TYPE_VOID:
                        break;
                default:
@@ -623,6 +785,10 @@ mono_arch_allocate_vars (MonoCompile *m)
        offset += 16 - 1;
        offset &= ~(16 - 1);
 
+       /* allow room to save the return value */
+       if (mono_jit_trace_calls != NULL && mono_trace_eval (m->method))
+               offset += 8;
+
        /* the MonoLMF structure is stored just below the stack pointer */
 
 #if 0
@@ -637,6 +803,9 @@ mono_arch_allocate_vars (MonoCompile *m)
        /* this is a global constant */
        mono_exc_esp_offset = offset;
 #endif
+       if (sig->call_convention == MONO_CALL_VARARG) {
+                m->sig_cookie = PPC_STACK_PARAM_OFFSET;
+        }
 
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
                inst = m->ret;
@@ -646,17 +815,20 @@ mono_arch_allocate_vars (MonoCompile *m)
                inst->opcode = OP_REGOFFSET;
                inst->inst_basereg = frame_reg;
                offset += sizeof(gpointer);
+               if (sig->call_convention == MONO_CALL_VARARG)
+                       m->sig_cookie += sizeof (gpointer);
        }
+
        curinst = m->locals_start;
        for (i = curinst; i < m->num_varinfo; ++i) {
                inst = m->varinfo [i];
-               if (inst->opcode == OP_REGVAR)
+               if ((inst->flags & MONO_INST_IS_DEAD) || inst->opcode == OP_REGVAR)
                        continue;
 
                /* inst->unused indicates native sized value types, this is used by the
                * pinvoke wrappers when they call functions returning structure */
-               if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype))
-                       size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
+               if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
+                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), &align);
                else
                        size = mono_type_size (inst->inst_vtype, &align);
 
@@ -679,6 +851,8 @@ mono_arch_allocate_vars (MonoCompile *m)
                        offset &= ~(sizeof (gpointer) - 1);
                        inst->inst_offset = offset;
                        offset += sizeof (gpointer);
+                       if (sig->call_convention == MONO_CALL_VARARG)
+                               m->sig_cookie += sizeof (gpointer);
                }
                curinst++;
        }
@@ -693,6 +867,8 @@ mono_arch_allocate_vars (MonoCompile *m)
                        offset &= ~(align - 1);
                        inst->inst_offset = offset;
                        offset += size;
+                       if ((sig->call_convention == MONO_CALL_VARARG) && (i < sig->sentinelpos)) 
+                               m->sig_cookie += size;
                }
                curinst++;
        }
@@ -721,8 +897,7 @@ MonoCallInst*
 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
        MonoInst *arg, *in;
        MonoMethodSignature *sig;
-       int i, n, type;
-       MonoType *ptype;
+       int i, n;
        CallInfo *cinfo;
        ArgInfo *ainfo;
 
@@ -735,6 +910,21 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
 
        for (i = 0; i < n; ++i) {
                ainfo = cinfo->args + i;
+               if ((sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
+                       MonoInst *sig_arg;
+                       cfg->disable_aot = TRUE;
+                               
+                       MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
+                       sig_arg->inst_p0 = call->signature;
+                       
+                       MONO_INST_NEW (cfg, arg, OP_OUTARG);
+                       arg->inst_imm = cinfo->sig_cookie.offset;
+                       arg->inst_left = sig_arg;
+                       
+                       /* prepend, so they get reversed */
+                       arg->next = call->out_args;
+                       call->out_args = arg;
+               }
                if (is_virtual && i == 0) {
                        /* the argument will be attached to the call instrucion */
                        in = call->args [i];
@@ -816,18 +1006,6 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
  * Allow tracing to work with this interface (with an optional argument)
  */
 
-/*
- * This may be needed on some archs or for debugging support.
- */
-void
-mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
-{
-       /* no stack room needed now (may be needed for FASTCALL-trace support) */
-       *stack = 0;
-       /* split prolog-epilog requirements? */
-       *code = 50; /* max bytes needed: check this number */
-}
-
 void*
 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
 {
@@ -854,9 +1032,20 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
 {
        guchar *code = p;
        int save_mode = SAVE_NONE;
+       int offset;
        MonoMethod *method = cfg->method;
-       int rtype = method->signature->ret->type;
+       int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type;
+       int save_offset = PPC_STACK_PARAM_OFFSET + cfg->param_area;
+       save_offset += 15;
+       save_offset &= ~15;
        
+       offset = code - cfg->native_code;
+       /* we need about 16 instructions */
+       if (offset > (cfg->code_size - 16 * 4)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               code = cfg->native_code + offset;
+       }
 handle_enum:
        switch (rtype) {
        case MONO_TYPE_VOID:
@@ -875,10 +1064,6 @@ handle_enum:
                save_mode = SAVE_FP;
                break;
        case MONO_TYPE_VALUETYPE:
-               if (method->signature->ret->data.klass->enumtype) {
-                       rtype = method->signature->ret->data.klass->enum_basetype->type;
-                       goto handle_enum;
-               }
                save_mode = SAVE_STRUCT;
                break;
        default:
@@ -888,26 +1073,26 @@ handle_enum:
 
        switch (save_mode) {
        case SAVE_TWO:
-               ppc_stw (code, ppc_r3, cfg->stack_usage - 8, cfg->frame_reg);
-               ppc_stw (code, ppc_r4, cfg->stack_usage - 4, cfg->frame_reg);
+               ppc_stw (code, ppc_r3, save_offset, cfg->frame_reg);
+               ppc_stw (code, ppc_r4, save_offset + 4, cfg->frame_reg);
                if (enable_arguments) {
                        ppc_mr (code, ppc_r5, ppc_r4);
                        ppc_mr (code, ppc_r4, ppc_r3);
                }
                break;
        case SAVE_ONE:
-               ppc_stw (code, ppc_r3, cfg->stack_usage - 8, cfg->frame_reg);
+               ppc_stw (code, ppc_r3, save_offset, cfg->frame_reg);
                if (enable_arguments) {
                        ppc_mr (code, ppc_r4, ppc_r3);
                }
                break;
        case SAVE_FP:
-               ppc_stfd (code, ppc_f1, cfg->stack_usage - 8, cfg->frame_reg);
+               ppc_stfd (code, ppc_f1, save_offset, cfg->frame_reg);
                if (enable_arguments) {
                        /* FIXME: what reg?  */
                        ppc_fmr (code, ppc_f3, ppc_f1);
-                       ppc_lwz (code, ppc_r4, cfg->stack_usage - 8, cfg->frame_reg);
-                       ppc_lwz (code, ppc_r5, cfg->stack_usage - 4, cfg->frame_reg);
+                       ppc_lwz (code, ppc_r4, save_offset, cfg->frame_reg);
+                       ppc_lwz (code, ppc_r5, save_offset + 4, cfg->frame_reg);
                }
                break;
        case SAVE_STRUCT:
@@ -928,14 +1113,14 @@ handle_enum:
 
        switch (save_mode) {
        case SAVE_TWO:
-               ppc_lwz (code, ppc_r3, cfg->stack_usage - 8, cfg->frame_reg);
-               ppc_lwz (code, ppc_r4, cfg->stack_usage - 4, cfg->frame_reg);
+               ppc_lwz (code, ppc_r3, save_offset, cfg->frame_reg);
+               ppc_lwz (code, ppc_r4, save_offset + 4, cfg->frame_reg);
                break;
        case SAVE_ONE:
-               ppc_lwz (code, ppc_r3, cfg->stack_usage - 8, cfg->frame_reg);
+               ppc_lwz (code, ppc_r3, save_offset, cfg->frame_reg);
                break;
        case SAVE_FP:
-               ppc_lfd (code, ppc_f1, cfg->stack_usage - 8, cfg->frame_reg);
+               ppc_lfd (code, ppc_f1, save_offset, cfg->frame_reg);
                break;
        case SAVE_NONE:
        default:
@@ -953,8 +1138,11 @@ handle_enum:
  * going to be perf critical anyway.
  */
 typedef struct {
-       MonoBasicBlock *bb;
-       void *ip;
+       union {
+               MonoBasicBlock *bb;
+               const char *exception;
+       } data;
+       guint32 ip_offset;
        guint16 b0_cond;
        guint16 b1_cond;
 } MonoOvfJump;
@@ -974,8 +1162,8 @@ if (ins->flags & MONO_INST_BRLABEL) { \
                int br_disp = ins->inst_true_bb->max_offset - offset;   \
                if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \
                        MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump));    \
-                       ovfj->bb = ins->inst_true_bb;   \
-                       ovfj->ip = NULL;        \
+                       ovfj->data.bb = ins->inst_true_bb;      \
+                       ovfj->ip_offset = 0;    \
                        ovfj->b0_cond = (b0);   \
                        ovfj->b1_cond = (b1);   \
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB_OVF, ovfj); \
@@ -999,18 +1187,17 @@ if (ins->flags & MONO_INST_BRLABEL) { \
                int br_disp = cfg->bb_exit->max_offset - offset;        \
                if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \
                        MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump));    \
-                       ovfj->bb = NULL;        \
-                       ovfj->ip = code;        \
+                       ovfj->data.exception = (exc_name);      \
+                       ovfj->ip_offset = code - cfg->native_code;      \
                        ovfj->b0_cond = (b0);   \
                        ovfj->b1_cond = (b1);   \
-                       /* FIXME: test this code */     \
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_OVF, ovfj); \
-                       ppc_b (code, 0);        \
+                       ppc_bl (code, 0);       \
                        cfg->bb_exit->max_offset += 24; \
                } else {        \
                        mono_add_patch_info (cfg, code - cfg->native_code,   \
                                    MONO_PATCH_INFO_EXC, exc_name);  \
-                       ppc_bc (code, (b0), (b1), 0);   \
+                       ppc_bcl (code, (b0), (b1), 0);  \
                }       \
        } while (0); 
 
@@ -1035,6 +1222,12 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                                        ins = ins->next;                                
                                        continue;
                                }
+                       } else {
+                               int power2 = mono_is_power_of_two (ins->inst_imm);
+                               if (power2 > 0) {
+                                       ins->opcode = OP_SHL_IMM;
+                                       ins->inst_imm = power2;
+                               }
                        }
                        break;
                case OP_LOAD_MEMBASE:
@@ -1136,6 +1329,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                case CEE_CONV_I4:
                case CEE_CONV_U4:
                case OP_MOVE:
+               case OP_SETREG:
                        ins->opcode = OP_MOVE;
                        /* 
                         * OP_MOVE reg, reg 
@@ -1274,7 +1468,11 @@ mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
 #undef DEBUG
 #define DEBUG(a) if (cfg->verbose_level > 1) a
 //#define DEBUG(a)
-#define reg_is_freeable(r) ((r) >= 3 && (r) <= 10)
+/* use ppc_r3-ppc_10,ppc_r12 as temp registers, f1-f13 for FP registers */
+#define PPC_CALLER_REGS ((0xff<<3) | (1<<12) | USE_EXTRA_TEMPS)
+#define PPC_CALLER_FREGS (0x3ffe)
+
+#define reg_is_freeable(r) (PPC_CALLER_REGS & 1 << (r))
 #define freg_is_freeable(r) ((r) >= 1 && (r) <= 13)
 
 typedef struct {
@@ -1609,10 +1807,6 @@ alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg,
        return val;
 }
 
-/* use ppc_r3-ppc_10,ppc_r12 as temp registers, f1-f13 for FP registers */
-#define PPC_CALLER_REGS ((0xff<<3) | (1<<12) | USE_EXTRA_TEMPS)
-#define PPC_CALLER_FREGS (0x3ffe)
-
 /*
  * Local register allocation.
  * We first scan the list of instructions and we save the liveness info of
@@ -1650,10 +1844,10 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
        while (ins) {
                spec = ins_spec [ins->opcode];
                DEBUG (print_ins (i, ins));
-               if (spec [MONO_INST_CLOB] == 'c') {
+               /*if (spec [MONO_INST_CLOB] == 'c') {
                        MonoCallInst * call = (MonoCallInst*)ins;
                        int j;
-               }
+               }*/
                if (spec [MONO_INST_SRC1]) {
                        if (spec [MONO_INST_SRC1] == 'f')
                                reginfo1 = reginfof;
@@ -1763,8 +1957,8 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                        } else {
                                prev_dreg = -1;
                        }
-                       if (freg_is_freeable (ins->dreg) && prev_dreg >= 0 && (reginfo [prev_dreg].born_in >= i || !(cur_fregs & (1 << ins->dreg)))) {
-                               DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
+                       if (freg_is_freeable (ins->dreg) && prev_dreg >= 0 && (reginfof [prev_dreg].born_in >= i || !(cur_fregs & (1 << ins->dreg)))) {
+                               DEBUG (g_print ("\tfreeable float %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfof [prev_dreg].born_in));
                                mono_regstate_free_float (rs, ins->dreg);
                        }
                } else if (ins->dreg >= MONO_MAX_IREGS) {
@@ -1840,10 +2034,10 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                        prev_dreg = -1;
                }
                if (spec [MONO_INST_DEST] == 'f' && freg_is_freeable (ins->dreg) && prev_dreg >= 0 && (reginfof [prev_dreg].born_in >= i)) {
-                       DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfof [prev_dreg].born_in));
+                       DEBUG (g_print ("\tfreeable float %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfof [prev_dreg].born_in));
                        mono_regstate_free_float (rs, ins->dreg);
                } else if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && (reginfo [prev_dreg].born_in >= i)) {
-                       DEBUG (g_print ("\tfreeable float %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
+                       DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
                        mono_regstate_free_int (rs, ins->dreg);
                }
                if (spec [MONO_INST_SRC1] == 'f') {
@@ -1982,6 +2176,7 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                //DEBUG (print_ins (i, ins));
                tmp = tmp->next;
        }
+       cfg->max_ireg = MAX (cfg->max_ireg, rs->max_ireg);
 }
 
 static guchar*
@@ -2065,17 +2260,13 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
        guint32 load [2];
        guchar *templ;
        int i, count = 0;
+       int difflow, diffhigh;
 
-       if (!pdata->absolute) {
-               g_assert (!is_call_imm (pdata->target - pdata->code));
-               /* make sure a jump is possible from the code to the thunk area */
-               i = pdata->code - code;
-               if (!is_call_imm (i))
-                       return 0;
-               i = pdata->code + csize - code;
-               if (!is_call_imm (i))
-                       return 0;
-       }
+       /* always ensure a call from pdata->code can reach to the thunks without further thunks */
+       difflow = (char*)pdata->code - (char*)thunks;
+       diffhigh = (char*)pdata->code - (char*)endthunks;
+       if (!((is_call_imm (thunks) && is_call_imm (endthunks)) || (is_call_imm (difflow) && is_call_imm (diffhigh))))
+               return 0;
 
        templ = (guchar*)load;
        ppc_lis (templ, ppc_r0, (guint32)(pdata->target) >> 16);
@@ -2147,82 +2338,43 @@ ppc_patch (guchar *code, guchar *target)
 
        //g_print ("patching 0x%08x (0x%08x) to point to 0x%08x\n", code, ins, target);
        if (prim == 18) {
-               if (target >= 0){
-                       if (target < 33554431){
-                               ins = (18 << 26) | ((guint32) target) | (ins & 1) | 2;
-                               *(guint32*)code = ins;
-                               return;
-                       } 
-               } else {
-                       if (target > -33554432){
-                               ins = (18 << 26) | (((guint32)target) & 0xfc000000) | (ins & 1) | 2;
-                               *(guint32*)code = ins;
-                               return;
-                       }
-               }
-               
+               // prefer relative branches, they are more position independent (e.g. for AOT compilation).
                gint diff = target - code;
                if (diff >= 0){
-                       if (diff < 33554431){
+                       if (diff <= 33554431){
                                ins = (18 << 26) | (diff) | (ins & 1);
                                *(guint32*)code = ins;
                                return;
-                       } else {
-                               handle_thunk (TRUE, code, target);
-                               return;
                        }
                } else {
                        /* diff between 0 and -33554432 */
-                       if (diff > -33554432){
+                       if (diff >= -33554432){
                                ins = (18 << 26) | (diff & ~0xfc000000) | (ins & 1);
                                *(guint32*)code = ins;
                                return;
-                       } else {
-                               handle_thunk (TRUE, code, target);
-                               return;
                        }
                }
-               g_assert_not_reached ();
-       }
-       
-#if OLD_REFERENCE_CODE
-               // absolute address
-               if (ins & 2) {
-                       gint diff = (gint)target;
-                       if ((diff < -33554432) || (diff > 33554431)) {
-                               diff = target - code;
-                               if (is_call_imm (diff)) {
-                                       handle_thunk (TRUE, code, target);
-                                       return;
-                               }
-                               /* change it to relative */
-                               ins &= ~2;
-                       }
-                       ins = prim << 26 | (ins & 3);
-                       diff &= ~0xfc000003;
-                       ins |= diff;
-               } else {
-                       gint diff = target - code;
-                       if (is_call_imm (target)) {
-                               /* we change it into an absolute reference */
-                               ins = prim << 26 | (ins & 3) | 2;
-                               diff = (gint)target;
-                               diff &= ~0xfc000003;
-                               ins |= diff;
+               
+               if ((glong)target >= 0){
+                       if ((glong)target <= 33554431){
+                               ins = (18 << 26) | ((guint32) target) | (ins & 1) | 2;
                                *(guint32*)code = ins;
                                return;
                        }
-                       if (!is_call_imm (diff)) {
-                               handle_thunk (FALSE, code, target);
+               } else {
+                       if ((glong)target >= -33554432){
+                               ins = (18 << 26) | (((guint32)target) & ~0xfc000000) | (ins & 1) | 2;
+                               *(guint32*)code = ins;
                                return;
                        }
-                       ins = prim << 26 | (ins & 3);
-                       diff &= ~0xfc000003;
-                       ins |= diff;
                }
-               *(guint32*)code = ins;
-       } 
-#endif
+
+               handle_thunk (TRUE, code, target);
+               return;
+
+               g_assert_not_reached ();
+       }
+       
        
        if (prim == 16) {
                // absolute address
@@ -2245,6 +2397,25 @@ ppc_patch (guchar *code, guchar *target)
                        ins |= diff;
                }
                *(guint32*)code = ins;
+               return;
+       }
+
+       if (prim == 15 || ins == 0x4e800021) {
+               guint32 *seq;
+               /* the trampoline code will try to patch the blrl */
+               if (ins == 0x4e800021) {
+                       code -= 12;
+               }
+               /* this is the lis/ori/mtlr/blrl sequence */
+               seq = (guint32*)code;
+               g_assert ((seq [0] >> 26) == 15);
+               g_assert ((seq [1] >> 26) == 24);
+               g_assert ((seq [2] >> 26) == 31);
+               g_assert (seq [3] == 0x4e800021);
+               /* FIXME: make this thread safe */
+               ppc_lis (code, ppc_r0, (guint32)(target) >> 16);
+               ppc_ori (code, ppc_r0, ppc_r0, (guint32)(target) & 0xffff);
+               mono_arch_flush_icache (code - 8, 8);
        } else {
                g_assert_not_reached ();
        }
@@ -2296,8 +2467,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
        //      if (ins->cil_code)
        //              g_print ("cil code\n");
+               mono_debug_record_line_number (cfg, ins, offset);
 
                switch (ins->opcode) {
+               case OP_TLS_GET:
+                       emit_tls_access (code, ins->dreg, ins->inst_offset);
+                       break;
                case OP_BIGMUL:
                        ppc_mullw (code, ppc_r4, ins->sreg1, ins->sreg2);
                        ppc_mulhw (code, ppc_r3, ins->sreg1, ins->sreg2);
@@ -2306,34 +2481,61 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mullw (code, ppc_r4, ins->sreg1, ins->sreg2);
                        ppc_mulhwu (code, ppc_r3, ins->sreg1, ins->sreg2);
                        break;
+               case OP_MEMORY_BARRIER:
+                       ppc_sync (code);
+                       break;
                case OP_STOREI1_MEMBASE_IMM:
-                       ppc_li (code, ppc_r11, ins->inst_imm);
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_stb (code, ppc_r11, ins->inst_offset, ins->inst_destbasereg);
+                       ppc_li (code, ppc_r0, ins->inst_imm);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_stb (code, ppc_r0, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_stbx (code, ppc_r0, ppc_r11, ins->inst_destbasereg);
+                       }
                        break;
                case OP_STOREI2_MEMBASE_IMM:
-                       ppc_li (code, ppc_r11, ins->inst_imm);
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_sth (code, ppc_r11, ins->inst_offset, ins->inst_destbasereg);
+                       ppc_li (code, ppc_r0, ins->inst_imm);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_sth (code, ppc_r0, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_sthx (code, ppc_r0, ppc_r11, ins->inst_destbasereg);
+                       }
                        break;
                case OP_STORE_MEMBASE_IMM:
                case OP_STOREI4_MEMBASE_IMM:
-                       ppc_load (code, ppc_r11, ins->inst_imm);
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_stw (code, ppc_r11, ins->inst_offset, ins->inst_destbasereg);
+                       ppc_load (code, ppc_r0, ins->inst_imm);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_stw (code, ppc_r0, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_stwx (code, ppc_r0, ppc_r11, ins->inst_destbasereg);
+                       }
                        break;
                case OP_STOREI1_MEMBASE_REG:
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_stb (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_stb (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_stbx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                       }
                        break;
                case OP_STOREI2_MEMBASE_REG:
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_sth (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_sth (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_sthx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                       }
                        break;
                case OP_STORE_MEMBASE_REG:
                case OP_STOREI4_MEMBASE_REG:
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_stw (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_stw (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_stwx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                       }
                        break;
                case CEE_LDIND_I:
                case CEE_LDIND_I4:
@@ -2356,22 +2558,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                ppc_lwzx (code, ins->dreg, ppc_r11, ins->inst_basereg);
                        }
                        break;
-               case OP_LOADU1_MEMBASE:
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_lbz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
-                       break;
                case OP_LOADI1_MEMBASE:
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_lbz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
-                       ppc_extsb (code, ins->dreg, ins->dreg);
+               case OP_LOADU1_MEMBASE:
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_lbz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_lbzx (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                       }
+                       if (ins->opcode == OP_LOADI1_MEMBASE)
+                               ppc_extsb (code, ins->dreg, ins->dreg);
                        break;
                case OP_LOADU2_MEMBASE:
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_lhz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_lhz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_lhzx (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                       }
                        break;
                case OP_LOADI2_MEMBASE:
-                       g_assert (ppc_is_imm16 (ins->inst_offset));
-                       ppc_lha (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_lha (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_lhax (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                       }
                        break;
                case CEE_CONV_I1:
                        ppc_extsb (code, ins->dreg, ins->sreg1);
@@ -2414,9 +2626,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                }
                        }
                        break;
-               case OP_X86_TEST_NULL:
-                       ppc_cmpi (code, 0, 0, ins->sreg1, 0);
-                       break;
                case CEE_BREAK:
                        ppc_break (code);
                        break;
@@ -2429,6 +2638,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_ADC:
                        ppc_adde (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
+               case OP_ADDCC_IMM:
+                       if (ppc_is_imm16 (ins->inst_imm)) {
+                               ppc_addic (code, ins->dreg, ins->sreg1, ins->inst_imm);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_imm);
+                               ppc_addc (code, ins->dreg, ins->sreg1, ppc_r11);
+                       }
+                       break;
                case OP_ADD_IMM:
                        if (ppc_is_imm16 (ins->inst_imm)) {
                                ppc_addi (code, ins->dreg, ins->sreg1, ins->inst_imm);
@@ -2442,19 +2659,76 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_adde (code, ins->dreg, ins->sreg1, ppc_r11);
                        break;
                case CEE_ADD_OVF:
-#if 0
-                       /* clear summary overflow */
-                       ppc_crxor (code, 28, 28, 28);
-                       //ppc_break (code);
-                       ppc_addod (code, ins->dreg, ins->sreg1, ins->sreg2);
-                       //ppc_break (code);
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_SO, "OverflowException");
-#endif
-                       ppc_addc (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
+                        */
+                       ppc_addo (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
+                       break;
+               case CEE_ADD_OVF_UN:
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
+                        */
+                       ppc_addco (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<13));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
+                       break;
+               case CEE_SUB_OVF:
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
+                        */
+                       ppc_subfo (code, ins->dreg, ins->sreg2, ins->sreg1);
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
+                       break;
+               case CEE_SUB_OVF_UN:
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
+                        */
+                       ppc_subfc (code, ins->dreg, ins->sreg2, ins->sreg1);
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<13));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "OverflowException");
+                       break;
+               case OP_ADD_OVF_CARRY:
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
+                        */
+                       ppc_addeo (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
+                       break;
+               case OP_ADD_OVF_UN_CARRY:
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
+                        */
+                       ppc_addeo (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<13));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
+                       break;
+               case OP_SUB_OVF_CARRY:
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
+                        */
+                       ppc_subfeo (code, ins->dreg, ins->sreg2, ins->sreg1);
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
+                       break;
+               case OP_SUB_OVF_UN_CARRY:
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
+                        */
+                       ppc_subfeo (code, ins->dreg, ins->sreg2, ins->sreg1);
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<13));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "OverflowException");
                        break;
                case OP_SUBCC:
                        ppc_subfc (code, ins->dreg, ins->sreg2, ins->sreg1);
                        break;
+               case OP_SUBCC_IMM:
+                       ppc_load (code, ppc_r11, ins->inst_imm);
+                       ppc_subfc (code, ins->dreg, ppc_r11, ins->sreg1);
+                       break;
                case CEE_SUB:
                        ppc_subf (code, ins->dreg, ins->sreg2, ins->sreg1);
                        break;
@@ -2472,7 +2746,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_SBB_IMM:
                        ppc_load (code, ppc_r11, ins->inst_imm);
-                       ppc_subfe (code, ins->dreg, ins->sreg2, ppc_r11);
+                       ppc_subfe (code, ins->dreg, ppc_r11, ins->sreg1);
                        break;
                case OP_PPC_SUBFIC:
                        g_assert (ppc_is_imm16 (ins->inst_imm));
@@ -2495,51 +2769,70 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                ppc_and (code, ins->sreg1, ins->dreg, ppc_r11);
                        }
                        break;
-               case CEE_DIV:
-                       /* clear the summary overflow flag */
-                       ppc_crxor (code, 28, 28, 28);
+               case CEE_DIV: {
+                       guint32 *divisor_is_m1;
+                         /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
+                         */
+                       ppc_cmpi (code, 0, 0, ins->sreg2, -1);
+                       divisor_is_m1 = code;
+                       ppc_bc (code, PPC_BR_FALSE | PPC_BR_LIKELY, PPC_BR_EQ, 0);
+                       ppc_lis (code, ppc_r11, 0x8000);
+                       ppc_cmp (code, 0, 0, ins->sreg1, ppc_r11);
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "ArithmeticException");
+                       ppc_patch (divisor_is_m1, code);
+                        /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
+                        */
                        ppc_divwod (code, ins->dreg, ins->sreg1, ins->sreg2);
-                       /* FIXME: use OverflowException for 0x80000000/-1 */
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_SO, "DivideByZeroException");
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
                        break;
+               }
                case CEE_DIV_UN:
-                       /* clear the summary overflow flag */
-                       ppc_crxor (code, 28, 28, 28);
                        ppc_divwuod (code, ins->dreg, ins->sreg1, ins->sreg2);
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_SO, "DivideByZeroException");
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
                        break;
                case OP_DIV_IMM:
+                       g_assert_not_reached ();
+#if 0
                        ppc_load (code, ppc_r11, ins->inst_imm);
-                       /* clear the summary overflow flag */
-                       ppc_crxor (code, 28, 28, 28);
                        ppc_divwod (code, ins->dreg, ins->sreg1, ppc_r11);
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_SO, "DivideByZeroException");
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       /* FIXME: use OverflowException for 0x80000000/-1 */
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
                        break;
-               case CEE_REM:
-                       /* clear the summary overflow flag */
-                       ppc_crxor (code, 28, 28, 28);
+#endif
+               case CEE_REM: {
+                       guint32 *divisor_is_m1;
+                       ppc_cmpi (code, 0, 0, ins->sreg2, -1);
+                       divisor_is_m1 = code;
+                       ppc_bc (code, PPC_BR_FALSE | PPC_BR_LIKELY, PPC_BR_EQ, 0);
+                       ppc_lis (code, ppc_r11, 0x8000);
+                       ppc_cmp (code, 0, 0, ins->sreg1, ppc_r11);
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "ArithmeticException");
+                       ppc_patch (divisor_is_m1, code);
                        ppc_divwod (code, ppc_r11, ins->sreg1, ins->sreg2);
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_SO, "DivideByZeroException");
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       /* FIXME: use OverflowException for 0x80000000/-1 */
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
                        ppc_mullw (code, ppc_r11, ppc_r11, ins->sreg2);
                        ppc_subf (code, ins->dreg, ppc_r11, ins->sreg1);
                        break;
+               }
                case CEE_REM_UN:
-                       /* clear the summary overflow flag */
-                       ppc_crxor (code, 28, 28, 28);
                        ppc_divwuod (code, ppc_r11, ins->sreg1, ins->sreg2);
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_SO, "DivideByZeroException");
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
                        ppc_mullw (code, ppc_r11, ppc_r11, ins->sreg2);
                        ppc_subf (code, ins->dreg, ppc_r11, ins->sreg1);
                        break;
                case OP_REM_IMM:
-                       ppc_load (code, ppc_r11, ins->inst_imm);
-                       /* clear the summary overflow flag */
-                       ppc_crxor (code, 28, 28, 28);
-                       ppc_divwod (code, ins->dreg, ins->sreg1, ppc_r11);
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_SO, "DivideByZeroException");
-                       ppc_mullw (code, ins->dreg, ins->dreg, ppc_r11);
-                       ppc_subf (code, ins->dreg, ins->dreg, ins->sreg1);
-                       break;
+                       g_assert_not_reached ();
                case CEE_OR:
                        ppc_or (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
@@ -2584,9 +2877,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_srawi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
                        break;
                case OP_SHR_UN_IMM:
-                       ppc_load (code, ppc_r11, ins->inst_imm);
-                       ppc_srw (code, ins->dreg, ins->sreg1, ppc_r11);
-                       //ppc_rlwinm (code, ins->dreg, ins->sreg1, (32 - (ins->inst_imm & 0xf)), (ins->inst_imm & 0xf), 31);
+                       /*ppc_load (code, ppc_r11, ins->inst_imm);
+                       ppc_srw (code, ins->dreg, ins->sreg1, ppc_r11);*/
+                       ppc_rlwinm (code, ins->dreg, ins->sreg1, (32 - (ins->inst_imm & 0x1f)), (ins->inst_imm & 0x1f), 31);
                        break;
                case CEE_SHR_UN:
                        ppc_srw (code, ins->dreg, ins->sreg1, ins->sreg2);
@@ -2601,13 +2894,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mullw (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_MUL_IMM:
-                       ppc_load (code, ppc_r11, ins->inst_imm);
-                       ppc_mullw (code, ins->dreg, ins->sreg1, ppc_r11);
+                       if (ppc_is_imm16 (ins->inst_imm)) {
+                           ppc_mulli (code, ins->dreg, ins->sreg1, ins->inst_imm);
+                       } else {
+                           ppc_load (code, ppc_r11, ins->inst_imm);
+                           ppc_mullw (code, ins->dreg, ins->sreg1, ppc_r11);
+                       }
                        break;
                case CEE_MUL_OVF:
+                       /* we annot use mcrxr, since it's not implemented on some processors 
+                        * XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
+                        */
                        ppc_mullwo (code, ins->dreg, ins->sreg1, ins->sreg2);
-                       ppc_mcrxr (code, 0);
-                       EMIT_COND_SYSTEM_EXCEPTION (CEE_BGT - CEE_BEQ, ins->inst_p1);
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
                case CEE_MUL_OVF_UN:
                        /* we first multiply to get the high word and compare to 0
@@ -2616,7 +2917,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         */
                        ppc_mulhwu (code, ppc_r0, ins->sreg1, ins->sreg2);
                        ppc_cmpi (code, 0, 0, ppc_r0, 0);
-                       EMIT_COND_SYSTEM_EXCEPTION (CEE_BNE_UN - CEE_BEQ, ins->inst_p1);
+                       EMIT_COND_SYSTEM_EXCEPTION (CEE_BNE_UN - CEE_BEQ, "OverflowException");
                        ppc_mullw (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ICONST:
@@ -2661,17 +2962,35 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         */
                        g_assert (!cfg->method->save_lmf);
                        if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-                               ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
+                               if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) {
+                                       ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
+                               } else {
+                                       ppc_load (code, ppc_r11, cfg->stack_usage + PPC_RET_ADDR_OFFSET);
+                                       ppc_lwzx (code, ppc_r0, cfg->frame_reg, ppc_r11);
+                               }
                                ppc_mtlr (code, ppc_r0);
                        }
-                       ppc_addic (code, ppc_sp, cfg->frame_reg, cfg->stack_usage);
+                       if (ppc_is_imm16 (cfg->stack_usage)) {
+                               ppc_addic (code, ppc_sp, cfg->frame_reg, cfg->stack_usage);
+                       } else {
+                               ppc_load (code, ppc_r11, cfg->stack_usage);
+                               ppc_add (code, ppc_sp, cfg->frame_reg, ppc_r11);
+                       }
                        if (!cfg->method->save_lmf) {
-                               for (i = 13; i < 32; ++i) {
+                               /*for (i = 31; i >= 14; --i) {
+                                       if (cfg->used_float_regs & (1 << i)) {
+                                               pos += sizeof (double);
+                                               ppc_lfd (code, i, -pos, cfg->frame_reg);
+                                       }
+                               }*/
+                               for (i = 31; i >= 13; --i) {
                                        if (cfg->used_int_regs & (1 << i)) {
-                                               pos += 4;
+                                               pos += sizeof (gulong);
                                                ppc_lwz (code, i, -pos, cfg->frame_reg);
                                        }
                                }
+                       } else {
+                               /* FIXME restore from MonoLMF: though this can't happen yet */
                        }
                        mono_add_patch_info (cfg, (guint8*) code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
                        ppc_b (code, 0);
@@ -2681,6 +3000,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        /* ensure ins->sreg1 is not NULL */
                        ppc_lwz (code, ppc_r0, 0, ins->sreg1);
                        break;
+               case OP_ARGLIST: {
+                       if (ppc_is_imm16 (cfg->sig_cookie + cfg->stack_usage)) {
+                               ppc_addi (code, ppc_r11, cfg->frame_reg, cfg->sig_cookie + cfg->stack_usage);
+                       } else {
+                               ppc_load (code, ppc_r11, cfg->sig_cookie + cfg->stack_usage);
+                               ppc_add (code, ppc_r11, cfg->frame_reg, ppc_r11);
+                       }
+                       ppc_stw (code, ppc_r11, 0, ins->sreg1);
+                       break;
+               }
                case OP_FCALL:
                case OP_LCALL:
                case OP_VCALL:
@@ -2691,7 +3020,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
                        else
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
-                       ppc_bl (code, 0);
+                       if (cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
                        break;
                case OP_FCALL_REG:
                case OP_LCALL_REG:
@@ -2714,14 +3050,35 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                        break;
                case OP_LOCALLOC: {
+                       guint32 * zero_loop_jump, * zero_loop_start;
                        /* keep alignment */
-                       int alloca_waste = PPC_STACK_PARAM_OFFSET + cfg->param_area + 15;
-                       ppc_addi (code, ppc_r11, ins->sreg1, alloca_waste);
+                       int alloca_waste = PPC_STACK_PARAM_OFFSET + cfg->param_area + 31;
+                       int area_offset = alloca_waste;
+                       area_offset &= ~31;
+                       ppc_addi (code, ppc_r11, ins->sreg1, alloca_waste + 31);
                        ppc_rlwinm (code, ppc_r11, ppc_r11, 0, 0, 27);
+                       /* use ctr to store the number of words to 0 if needed */
+                       if (ins->flags & MONO_INST_INIT) {
+                               /* we zero 4 bytes at a time */
+                               ppc_addi (code, ppc_r0, ins->sreg1, 3);
+                               ppc_srawi (code, ppc_r0, ppc_r0, 2);
+                               ppc_mtctr (code, ppc_r0);
+                       }
                        ppc_lwz (code, ppc_r0, 0, ppc_sp);
                        ppc_neg (code, ppc_r11, ppc_r11);
-                       ppc_stwux (code, ppc_r0, ppc_r11, ppc_sp);
-                       ppc_addi (code, ins->dreg, ppc_sp, PPC_STACK_PARAM_OFFSET + cfg->param_area);
+                       ppc_stwux (code, ppc_r0, ppc_sp, ppc_r11);
+                       
+                       if (ins->flags & MONO_INST_INIT) {
+                               /* adjust the dest reg by -4 so we can use stwu */
+                               ppc_addi (code, ins->dreg, ppc_sp, (area_offset - 4));
+                               ppc_li (code, ppc_r11, 0);
+                               zero_loop_start = code;
+                               ppc_stwu (code, ppc_r11, 4, ins->dreg);
+                               zero_loop_jump = code;
+                               ppc_bc (code, PPC_BR_DEC_CTR_NONZERO, 0, 0);
+                               ppc_patch (zero_loop_jump, zero_loop_start);
+                       }
+                       ppc_addi (code, ins->dreg, ppc_sp, area_offset);
                        break;
                }
                case CEE_RET:
@@ -2732,17 +3089,49 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mr (code, ppc_r3, ins->sreg1);
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                             (gpointer)"mono_arch_throw_exception");
-                       ppc_bl (code, 0);
+                       if (cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
+                       break;
+               }
+               case OP_RETHROW: {
+                       //ppc_break (code);
+                       ppc_mr (code, ppc_r3, ins->sreg1);
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+                                            (gpointer)"mono_arch_rethrow_exception");
+                       if (cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
                        break;
                }
                case OP_START_HANDLER:
                        ppc_mflr (code, ppc_r0);
-                       ppc_stw (code, ppc_r0, ins->inst_left->inst_offset, ins->inst_left->inst_basereg);
+                       if (ppc_is_imm16 (ins->inst_left->inst_offset)) {
+                               ppc_stw (code, ppc_r0, ins->inst_left->inst_offset, ins->inst_left->inst_basereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_left->inst_offset);
+                               ppc_stwx (code, ppc_r0, ppc_r11, ins->inst_left->inst_basereg);
+                       }
                        break;
                case OP_ENDFILTER:
                        if (ins->sreg1 != ppc_r3)
                                ppc_mr (code, ppc_r3, ins->sreg1);
-                       ppc_lwz (code, ppc_r0, ins->inst_left->inst_offset, ins->inst_left->inst_basereg);
+                       if (ppc_is_imm16 (ins->inst_left->inst_offset)) {
+                               ppc_lwz (code, ppc_r0, ins->inst_left->inst_offset, ins->inst_left->inst_basereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_left->inst_offset);
+                               ppc_lwzx (code, ppc_r0, ins->inst_left->inst_basereg, ppc_r11);
+                       }
                        ppc_mtlr (code, ppc_r0);
                        ppc_blr (code);
                        break;
@@ -2814,17 +3203,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_SYSTEM_EXCEPTION (ins->opcode - OP_COND_EXC_EQ, ins->inst_p1);
                        break;
                case OP_COND_EXC_C:
-                       /* move XER [0-3] (SO, OV, CA) into CR 
-                        * this translates to LT, GT, EQ.
-                        * FIXME: test for all the conditions occourring
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
                         */
-                       ppc_mcrxr (code, 0);
-                       EMIT_COND_SYSTEM_EXCEPTION (CEE_BEQ - CEE_BEQ, ins->inst_p1);
-                       break;
+                       /*ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
+                       break;*/
                case OP_COND_EXC_OV:
-                       ppc_mcrxr (code, 0);
+                       /*ppc_mcrxr (code, 0);
                        EMIT_COND_SYSTEM_EXCEPTION (CEE_BGT - CEE_BEQ, ins->inst_p1);
-                       break;
+                       break;*/
                case OP_COND_EXC_NC:
                case OP_COND_EXC_NO:
                        g_assert_not_reached ();
@@ -2852,30 +3240,52 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_lfs (code, ins->dreg, 0, ppc_r11);
                        break;
                case OP_STORER8_MEMBASE_REG:
-                       ppc_stfd (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
-                       break;
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_stfd (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_stfdx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                       }
+                       break;
                case OP_LOADR8_MEMBASE:
-                       ppc_lfd (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_lfd (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_lfdx (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                       }
                        break;
                case OP_STORER4_MEMBASE_REG:
-                       ppc_stfs (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       ppc_frsp (code, ins->sreg1, ins->sreg1);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_stfs (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_stfsx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                       }
                        break;
                case OP_LOADR4_MEMBASE:
-                       ppc_lfs (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_lfs (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       } else {
+                               ppc_load (code, ppc_r11, ins->inst_offset);
+                               ppc_lfsx (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                       }
                        break;
                case CEE_CONV_R_UN: {
-                       static const guint64 adjust_val = 0x4330000000000000UL;
+                       static const guint64 adjust_val = 0x4330000000000000ULL;
                        ppc_addis (code, ppc_r0, ppc_r0, 0x4330);
                        ppc_stw (code, ppc_r0, -8, ppc_sp);
                        ppc_stw (code, ins->sreg1, -4, ppc_sp);
                        ppc_load (code, ppc_r11, &adjust_val);
+                       ppc_lfd (code, ins->dreg, -8, ppc_sp);
                        ppc_lfd (code, ppc_f0, 0, ppc_r11);
                        ppc_fsub (code, ins->dreg, ins->dreg, ppc_f0);
                        break;
                }
                case CEE_CONV_R4: /* FIXME: change precision */
                case CEE_CONV_R8: {
-                       static const guint64 adjust_val = 0x4330000080000000UL;
+                       static const guint64 adjust_val = 0x4330000080000000ULL;
                        // addis is special for ppc_r0
                        ppc_addis (code, ppc_r0, ppc_r0, 0x4330);
                        ppc_stw (code, ppc_r0, -8, ppc_sp);
@@ -2887,14 +3297,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_fsub (code, ins->dreg, ins->dreg, ppc_f0);
                        break;
                }
-               case OP_X86_FP_LOAD_I8:
-                       g_assert_not_reached ();
-                       /*x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);*/
-                       break;
-               case OP_X86_FP_LOAD_I4:
-                       g_assert_not_reached ();
-                       /*x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);*/
-                       break;
                case OP_FCONV_TO_I1:
                        code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, TRUE);
                        break;
@@ -2925,8 +3327,28 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        /* Implemented as helper calls */
                        break;
                case OP_LCONV_TO_OVF_I: {
-                       ppc_mr (code, ins->dreg, ins->sreg1);
-                       /* FIXME: emit exception if needed */
+                       guint32 *negative_branch, *msword_positive_branch, *msword_negative_branch, *ovf_ex_target;
+                       // Check if its negative
+                       ppc_cmpi (code, 0, 0, ins->sreg1, 0);
+                       negative_branch = code;
+                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_LT, 0);
+                       // Its positive msword == 0
+                       ppc_cmpi (code, 0, 0, ins->sreg2, 0);
+                       msword_positive_branch = code;
+                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_EQ, 0);
+
+                       ovf_ex_target = code;
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_ALWAYS, 0, "OverflowException");
+                       // Negative
+                       ppc_patch (negative_branch, code);
+                       ppc_cmpi (code, 0, 0, ins->sreg2, -1);
+                       msword_negative_branch = code;
+                       ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
+                       ppc_patch (msword_negative_branch, ovf_ex_target);
+                       
+                       ppc_patch (msword_positive_branch, code);
+                       if (ins->dreg != ins->sreg1)
+                               ppc_mr (code, ins->dreg, ins->sreg1);
                        break;
                }
                case OP_SQRT:
@@ -3067,18 +3489,11 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
 
        for (patch_info = ji; patch_info; patch_info = patch_info->next) {
                unsigned char *ip = patch_info->ip.i + code;
-               const unsigned char *target = NULL;
+               const unsigned char *target;
+
+               target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
 
                switch (patch_info->type) {
-               case MONO_PATCH_INFO_BB:
-                       target = patch_info->data.bb->native_offset + code;
-                       break;
-               case MONO_PATCH_INFO_ABS:
-                       target = patch_info->data.target;
-                       break;
-               case MONO_PATCH_INFO_LABEL:
-                       target = patch_info->data.inst->inst_c0 + code;
-                       break;
                case MONO_PATCH_INFO_IP:
                        patch_lis_ori (ip, ip);
                        continue;
@@ -3086,45 +3501,16 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                        g_assert_not_reached ();
                        *((gpointer *)(ip)) = code + patch_info->data.offset;
                        continue;
-               case MONO_PATCH_INFO_INTERNAL_METHOD: {
-                       MonoJitICallInfo *mi = mono_find_jit_icall_by_name (patch_info->data.name);
-                       if (!mi) {
-                               g_warning ("unknown MONO_PATCH_INFO_INTERNAL_METHOD %s", patch_info->data.name);
-                               g_assert_not_reached ();
-                       }
-                       target = mono_icall_get_wrapper (mi);
-                       break;
-               }
-               case MONO_PATCH_INFO_METHOD_JUMP: {
-                       GSList *list;
-
-                       /* get the trampoline to the method from the domain */
-                       target = mono_create_jump_trampoline (domain, patch_info->data.method, TRUE);
-                       if (!domain->jump_target_hash)
-                               domain->jump_target_hash = g_hash_table_new (NULL, NULL);
-                       list = g_hash_table_lookup (domain->jump_target_hash, patch_info->data.method);
-                       list = g_slist_prepend (list, ip);
-                       g_hash_table_insert (domain->jump_target_hash, patch_info->data.method, list);
-                       break;
-               }
-               case MONO_PATCH_INFO_METHOD:
-                       if (patch_info->data.method == method) {
-                               target = code;
-                       } else {
-                               /* get the trampoline to the method from the domain */
-                               target = mono_arch_create_jit_trampoline (patch_info->data.method);
-                       }
-                       break;
                case MONO_PATCH_INFO_SWITCH: {
-                       gpointer *table = (gpointer *)patch_info->data.target;
+                       gpointer *table = (gpointer *)patch_info->data.table->table;
                        int i;
 
                        // FIXME: inspect code to get the register
-                       ppc_load (ip, ppc_r11, patch_info->data.target);
+                       ppc_load (ip, ppc_r11, table);
                        //*((gconstpointer *)(ip + 2)) = patch_info->data.target;
 
-                       for (i = 0; i < patch_info->table_size; i++) {
-                               table [i] = (int)patch_info->data.table [i] + code;
+                       for (i = 0; i < patch_info->data.table->table_size; i++) { 
+                               table [i] = (int)patch_info->data.table->table [i] + code;
                        }
                        /* we put into the table the absolute address, no need for ppc_patch in this case */
                        continue;
@@ -3133,127 +3519,51 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                case MONO_PATCH_INFO_CLASS:
                case MONO_PATCH_INFO_IMAGE:
                case MONO_PATCH_INFO_FIELD:
+               case MONO_PATCH_INFO_VTABLE:
+               case MONO_PATCH_INFO_IID:
+               case MONO_PATCH_INFO_SFLDA:
+               case MONO_PATCH_INFO_LDSTR:
+               case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
+               case MONO_PATCH_INFO_LDTOKEN:
                        /* from OP_AOTCONST : lis + ori */
-                       patch_lis_ori (ip, patch_info->data.target);
+                       patch_lis_ori (ip, target);
                        continue;
                case MONO_PATCH_INFO_R4:
                case MONO_PATCH_INFO_R8:
                        g_assert_not_reached ();
                        *((gconstpointer *)(ip + 2)) = patch_info->data.target;
                        continue;
-               case MONO_PATCH_INFO_IID:
-                       mono_class_init (patch_info->data.klass);
-                       patch_lis_ori (ip, patch_info->data.klass->interface_id);
-                       continue;                       
-               case MONO_PATCH_INFO_VTABLE:
-                       target = mono_class_vtable (domain, patch_info->data.klass);
-                       patch_lis_ori (ip, target);
-                       continue;
-               case MONO_PATCH_INFO_CLASS_INIT:
-                       target = mono_create_class_init_trampoline (mono_class_vtable (domain, patch_info->data.klass));
-                       break;
-               case MONO_PATCH_INFO_SFLDA: {
-                       MonoVTable *vtable = mono_class_vtable (domain, patch_info->data.field->parent);
-                       if (!vtable->initialized && !(vtable->klass->flags & TYPE_ATTRIBUTE_BEFORE_FIELD_INIT) && mono_class_needs_cctor_run (vtable->klass, method))
-                               /* Done by the generated code */
-                               ;
-                       else {
-                               if (run_cctors)
-                                       mono_runtime_class_init (vtable);
-                       }
-                       target = (char*)vtable->data + patch_info->data.field->offset;
-                       patch_lis_ori (ip, target);
-                       continue;
-               }
                case MONO_PATCH_INFO_EXC_NAME:
                        g_assert_not_reached ();
                        *((gconstpointer *)(ip + 1)) = patch_info->data.name;
                        continue;
-               case MONO_PATCH_INFO_LDSTR:
-                       target = mono_ldstr (domain, patch_info->data.token->image, 
-                                                       mono_metadata_token_index (patch_info->data.token->token));
-                       patch_lis_ori (ip, target);
-                       continue;
-               case MONO_PATCH_INFO_TYPE_FROM_HANDLE: {
-                       gpointer handle;
-                       MonoClass *handle_class;
-
-                       handle = mono_ldtoken (patch_info->data.token->image, 
-                                                                  patch_info->data.token->token, &handle_class, NULL);
-                       mono_class_init (handle_class);
-                       mono_class_init (mono_class_from_mono_type (handle));
-
-                       patch_lis_ori (ip, handle);
-                       continue;
-               }
-               case MONO_PATCH_INFO_LDTOKEN: {
-                       gpointer handle;
-                       MonoClass *handle_class;
-
-                       handle = mono_ldtoken (patch_info->data.token->image,
-                                                                  patch_info->data.token->token, &handle_class, NULL);
-                       mono_class_init (handle_class);
-
-                       patch_lis_ori (ip, handle);
-                       continue;
-               }
+               case MONO_PATCH_INFO_NONE:
                case MONO_PATCH_INFO_BB_OVF:
                case MONO_PATCH_INFO_EXC_OVF:
                        /* everything is dealt with at epilog output time */
                        continue;
                default:
-                       g_assert_not_reached ();
+                       break;
                }
                ppc_patch (ip, target);
        }
 }
 
-int
-mono_arch_max_epilog_size (MonoCompile *cfg)
-{
-       int exc_count = 0, max_epilog_size = 16 + 20*4;
-       MonoJumpInfo *patch_info;
-       
-       if (cfg->method->save_lmf)
-               max_epilog_size += 128;
-       
-       if (mono_jit_trace_calls != NULL)
-               max_epilog_size += 50;
-
-       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
-               max_epilog_size += 50;
-
-       /* count the number of exception infos */
-     
-       /* 
-        * make sure we have enough space for exceptions
-        * 24 is the simulated call to throw_exception_by_name
-        */
-       for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
-               if (patch_info->type == MONO_PATCH_INFO_EXC)
-                       max_epilog_size += 24;
-               else if (patch_info->type == MONO_PATCH_INFO_BB_OVF)
-                       max_epilog_size += 12;
-               else if (patch_info->type == MONO_PATCH_INFO_EXC_OVF)
-                       max_epilog_size += 12;
-       }
-
-       return max_epilog_size;
-}
-
 /*
  * Stack frame layout:
  * 
  *   ------------------- sp
- *     optional 8 bytes for tracing (later use a proper local variable?)
- *   -------------------
  *     MonoLMF structure or saved registers
  *   -------------------
+ *     spilled regs
+ *   -------------------
  *     locals
  *   -------------------
- *     param area
+ *     optional 8 bytes for tracing
+ *   -------------------
+ *     param area             size is cfg->param_area
  *   -------------------
- *     linkage area
+ *     linkage area           size is PPC_STACK_PARAM_OFFSET
  *   ------------------- sp
  *     red zone
  */
@@ -3273,37 +3583,33 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
                tracing = 1;
 
-       cfg->code_size = 256;
+       sig = mono_method_signature (method);
+       cfg->code_size = 256 + sig->param_count * 20;
        code = cfg->native_code = g_malloc (cfg->code_size);
 
        if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
                ppc_mflr (code, ppc_r0);
                ppc_stw (code, ppc_r0, PPC_RET_ADDR_OFFSET, ppc_sp);
        }
-       if (cfg->flags & MONO_CFG_HAS_ALLOCA) {
-               cfg->used_int_regs |= 1 << 31;
-       }
-       cfg->used_int_regs |= USE_EXTRA_TEMPS;
+       if (cfg->max_ireg >= 29)
+               cfg->used_int_regs |= USE_EXTRA_TEMPS;
 
        alloc_size = cfg->stack_offset;
        pos = 0;
-       /* reserve room to save return value */
-       if (tracing)
-               pos += 8;
 
        if (!method->save_lmf) {
-               for (i = 13; i < 32; ++i) {
-                       if (cfg->used_int_regs & (1 << i)) {
-                               pos += sizeof (gulong);
-                               ppc_stw (code, i, -pos, ppc_sp);
-                       }
-               }
-               /*for (i = 14; i < 32; ++i) {
+               /*for (i = 31; i >= 14; --i) {
                        if (cfg->used_float_regs & (1 << i)) {
                                pos += sizeof (gdouble);
                                ppc_stfd (code, i, -pos, ppc_sp);
                        }
                }*/
+               for (i = 31; i >= 13; --i) {
+                       if (cfg->used_int_regs & (1 << i)) {
+                               pos += sizeof (gulong);
+                               ppc_stw (code, i, -pos, ppc_sp);
+                       }
+               }
        } else {
                int ofs;
                pos += sizeof (MonoLMF);
@@ -3322,12 +3628,17 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        }
 
        cfg->stack_usage = alloc_size;
-       g_assert (ppc_is_imm16 (-alloc_size));
        g_assert ((alloc_size & (PPC_STACK_ALIGNMENT-1)) == 0);
-       if (alloc_size)
-               ppc_stwu (code, ppc_sp, -alloc_size, ppc_sp);
-       if (cfg->flags & MONO_CFG_HAS_ALLOCA)
-               ppc_mr (code, ppc_r31, ppc_sp);
+       if (alloc_size) {
+               if (ppc_is_imm16 (-alloc_size)) {
+                       ppc_stwu (code, ppc_sp, -alloc_size, ppc_sp);
+               } else {
+                       ppc_load (code, ppc_r11, -alloc_size);
+                       ppc_stwux (code, ppc_sp, ppc_sp, ppc_r11);
+               }
+       }
+       if (cfg->frame_reg != ppc_sp)
+               ppc_mr (code, cfg->frame_reg, ppc_sp);
 
         /* compute max_offset in order to use short forward jumps
         * we always do it on ppc because the immediate displacement
@@ -3348,7 +3659,6 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        }
 
        /* load arguments allocated to register from the stack */
-       sig = method->signature;
        pos = 0;
 
        cinfo = calculate_sizes (sig, sig->pinvoke);
@@ -3356,12 +3666,19 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
                ArgInfo *ainfo = &cinfo->ret;
                inst = cfg->ret;
-               ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+               if (ppc_is_imm16 (inst->inst_offset)) {
+                       ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+               } else {
+                       ppc_load (code, ppc_r11, inst->inst_offset);
+                       ppc_stwx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+               }
        }
        for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
                ArgInfo *ainfo = cinfo->args + i;
                inst = cfg->varinfo [pos];
                
+               if (cfg->verbose_level > 2)
+                       g_print ("Saving argument %d (type: %d)\n", i, ainfo->regtype);
                if (inst->opcode == OP_REGVAR) {
                        if (ainfo->regtype == RegTypeGeneral)
                                ppc_mr (code, inst->dreg, ainfo->reg);
@@ -3380,17 +3697,40 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        if (ainfo->regtype == RegTypeGeneral) {
                                switch (ainfo->size) {
                                case 1:
-                                       ppc_stb (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_stb (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               ppc_load (code, ppc_r11, inst->inst_offset);
+                                               ppc_stbx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                       }
                                        break;
                                case 2:
-                                       ppc_sth (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_sth (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               ppc_load (code, ppc_r11, inst->inst_offset);
+                                               ppc_sthx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                       }
                                        break;
                                case 8:
-                                       ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
-                                       ppc_stw (code, ainfo->reg + 1, inst->inst_offset + 4, inst->inst_basereg);
+                                       if (ppc_is_imm16 (inst->inst_offset + 4)) {
+                                               ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                               ppc_stw (code, ainfo->reg + 1, inst->inst_offset + 4, inst->inst_basereg);
+                                       } else {
+                                               ppc_load (code, ppc_r11, inst->inst_offset);
+                                               ppc_add (code, ppc_r11, ppc_r11, inst->inst_basereg);
+                                               ppc_stw (code, ainfo->reg, 0, ppc_r11);
+                                               ppc_stw (code, ainfo->reg + 1, 4, ppc_r11);
+                                       }
                                        break;
                                default:
-                                       ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               ppc_load (code, ppc_r11, inst->inst_offset);
+                                               ppc_stwx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                       }
+                                       break;
                                }
                        } else if (ainfo->regtype == RegTypeBase) {
                                /* load the previous stack pointer in r11 */
@@ -3398,20 +3738,42 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                ppc_lwz (code, ppc_r0, ainfo->offset, ppc_r11);
                                switch (ainfo->size) {
                                case 1:
-                                       ppc_stb (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_stb (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               ppc_load (code, ppc_r11, inst->inst_offset);
+                                               ppc_stbx (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                       }
                                        break;
                                case 2:
-                                       ppc_sth (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_sth (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               ppc_load (code, ppc_r11, inst->inst_offset);
+                                               ppc_sthx (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                       }
                                        break;
                                case 8:
-                                       ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
-                                       ppc_lwz (code, ppc_r0, ainfo->offset + 4, ppc_r11);
-                                       ppc_stw (code, ppc_r0, inst->inst_offset + 4, inst->inst_basereg);
+                                       if (ppc_is_imm16 (inst->inst_offset + 4)) {
+                                               ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                               ppc_lwz (code, ppc_r0, ainfo->offset + 4, ppc_r11);
+                                               ppc_stw (code, ppc_r0, inst->inst_offset + 4, inst->inst_basereg);
+                                       } else {
+                                               /* FIXME */
+                                               g_assert_not_reached ();
+                                       }
                                        break;
                                default:
-                                       ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               ppc_load (code, ppc_r11, inst->inst_offset);
+                                               ppc_stwx (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                       }
+                                       break;
                                }
                        } else if (ainfo->regtype == RegTypeFP) {
+                               g_assert (ppc_is_imm16 (inst->inst_offset));
                                if (ainfo->size == 8)
                                        ppc_stfd (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                else if (ainfo->size == 4)
@@ -3422,8 +3784,24 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                int doffset = inst->inst_offset;
                                int soffset = 0;
                                int cur_reg;
+                               int size = 0;
+                               g_assert (ppc_is_imm16 (inst->inst_offset));
+                               g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->size * sizeof (gpointer)));
+                               if (mono_class_from_mono_type (inst->inst_vtype))
+                                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
                                for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
-                                       ppc_stw (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+/*
+Darwin handles 1 and 2 byte structs specially by loading h/b into the arg
+register.  Should this case include linux/ppc?
+*/
+#if __APPLE__
+                                       if (size == 2)
+                                               ppc_sth (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+                                       else if (size == 1)
+                                               ppc_stb (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+                                       else 
+#endif
+                                               ppc_stw (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
                                        soffset += sizeof (gpointer);
                                        doffset += sizeof (gpointer);
                                }
@@ -3434,6 +3812,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer), inst->inst_basereg, doffset, ppc_r11, ainfo->offset + soffset);
                                }
                        } else if (ainfo->regtype == RegTypeStructByAddr) {
+                               g_assert (ppc_is_imm16 (inst->inst_offset));
                                /* FIXME: handle overrun! with struct sizes not multiple of 4 */
                                code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer), inst->inst_basereg, inst->inst_offset, ainfo->reg, 0);
                        } else
@@ -3442,18 +3821,38 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                pos++;
        }
 
-       if (method->save_lmf) {
+       if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
+               ppc_load (code, ppc_r3, cfg->domain);
+               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
+               ppc_bl (code, 0);
+       }
 
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+       if (method->save_lmf) {
+               if (lmf_pthread_key != -1) {
+                       emit_tls_access (code, ppc_r3, lmf_pthread_key);
+                       if (G_STRUCT_OFFSET (MonoJitTlsData, lmf))
+                               ppc_addi (code, ppc_r3, ppc_r3, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
+               } else {
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                     (gpointer)"mono_get_lmf_addr");
-               ppc_bl (code, 0);
+                       if (cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
+               }
                /* we build the MonoLMF structure on the stack - see mini-ppc.h */
                /* lmf_offset is the offset from the previous stack pointer,
                 * alloc_size is the total stack space allocated, so the offset
                 * of MonoLMF from the current stack ptr is alloc_size - lmf_offset.
-                * The pointer to the struct is put in ppc_r11.
+                * The pointer to the struct is put in ppc_r11 (new_lmf).
+                * The callee-saved registers are already in the MonoLMF structure
                 */
                ppc_addi (code, ppc_r11, ppc_sp, alloc_size - lmf_offset);
+               /* ppc_r3 is the result from mono_get_lmf_addr () */
                ppc_stw (code, ppc_r3, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11);
                /* new_lmf->previous_lmf = *lmf_addr */
                ppc_lwz (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3);
@@ -3474,6 +3873,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
 
        cfg->code_len = code - cfg->native_code;
+       g_assert (cfg->code_len < cfg->code_size);
        g_free (cinfo);
 
        return code;
@@ -3485,8 +3885,24 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        MonoJumpInfo *patch_info;
        MonoMethod *method = cfg->method;
        int pos, i;
+       int max_epilog_size = 16 + 20*4;
        guint8 *code;
 
+       if (cfg->method->save_lmf)
+               max_epilog_size += 128;
+       
+       if (mono_jit_trace_calls != NULL)
+               max_epilog_size += 50;
+
+       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
+               max_epilog_size += 50;
+
+       while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               mono_jit_stats.code_reallocs++;
+       }
+
        /*
         * Keep in sync with CEE_JMP
         */
@@ -3494,10 +3910,8 @@ mono_arch_emit_epilog (MonoCompile *cfg)
 
        if (mono_jit_trace_calls != NULL && mono_trace_eval (method)) {
                code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
-               pos = 8;
-       } else {
-               pos = 0;
        }
+       pos = 0;
 
        if (method->save_lmf) {
                int lmf_offset;
@@ -3512,12 +3926,16 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                ppc_lwz (code, ppc_r6, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11);
                /* *(lmf_addr) = previous_lmf */
                ppc_stw (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r6);
+               /* FIXME: speedup: there is no actual need to restore the registers if
+                * we didn't actually change them (idea from Zoltan).
+                */
                /* restore iregs */
                ppc_lmw (code, ppc_r13, ppc_r11, G_STRUCT_OFFSET(MonoLMF, iregs));
                /* restore fregs */
-               for (i = 14; i < 32; i++) {
+               /*for (i = 14; i < 32; i++) {
                        ppc_lfd (code, i, G_STRUCT_OFFSET(MonoLMF, fregs) + ((i-14) * sizeof (gdouble)), ppc_r11);
-               }
+               }*/
+               g_assert (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET));
                /* use the saved copy of the frame reg in r8 */
                if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
                        ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, ppc_r8);
@@ -3526,20 +3944,110 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                ppc_addic (code, ppc_sp, ppc_r8, cfg->stack_usage);
        } else {
                if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-                       ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
+                       if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) {
+                               ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
+                       } else {
+                               ppc_load (code, ppc_r11, cfg->stack_usage + PPC_RET_ADDR_OFFSET);
+                               ppc_lwzx (code, ppc_r0, cfg->frame_reg, ppc_r11);
+                       }
                        ppc_mtlr (code, ppc_r0);
                }
-               ppc_addic (code, ppc_sp, cfg->frame_reg, cfg->stack_usage);
+               if (ppc_is_imm16 (cfg->stack_usage)) {
+                       ppc_addic (code, ppc_sp, cfg->frame_reg, cfg->stack_usage);
+               } else {
+                       ppc_load (code, ppc_r11, cfg->stack_usage);
+                       ppc_add (code, ppc_sp, cfg->frame_reg, ppc_r11);
+               }
 
-               for (i = 13; i < 32; ++i) {
+               /*for (i = 31; i >= 14; --i) {
+                       if (cfg->used_float_regs & (1 << i)) {
+                               pos += sizeof (double);
+                               ppc_lfd (code, i, -pos, ppc_sp);
+                       }
+               }*/
+               for (i = 31; i >= 13; --i) {
                        if (cfg->used_int_regs & (1 << i)) {
-                               pos += 4;
-                               ppc_lwz (code, i, -pos, cfg->frame_reg);
+                               pos += sizeof (gulong);
+                               ppc_lwz (code, i, -pos, ppc_sp);
                        }
                }
        }
        ppc_blr (code);
 
+       cfg->code_len = code - cfg->native_code;
+
+       g_assert (cfg->code_len < cfg->code_size);
+
+}
+
+/* remove once throw_exception_by_name is eliminated */
+static int
+exception_id_by_name (const char *name)
+{
+       if (strcmp (name, "IndexOutOfRangeException") == 0)
+               return MONO_EXC_INDEX_OUT_OF_RANGE;
+       if (strcmp (name, "OverflowException") == 0)
+               return MONO_EXC_OVERFLOW;
+       if (strcmp (name, "ArithmeticException") == 0)
+               return MONO_EXC_ARITHMETIC;
+       if (strcmp (name, "DivideByZeroException") == 0)
+               return MONO_EXC_DIVIDE_BY_ZERO;
+       if (strcmp (name, "InvalidCastException") == 0)
+               return MONO_EXC_INVALID_CAST;
+       if (strcmp (name, "NullReferenceException") == 0)
+               return MONO_EXC_NULL_REF;
+       if (strcmp (name, "ArrayTypeMismatchException") == 0)
+               return MONO_EXC_ARRAY_TYPE_MISMATCH;
+       g_error ("Unknown intrinsic exception %s\n", name);
+       return 0;
+}
+
+void
+mono_arch_emit_exceptions (MonoCompile *cfg)
+{
+       MonoJumpInfo *patch_info;
+       int nthrows, i;
+       guint8 *code;
+       const guint8* exc_throw_pos [MONO_EXC_INTRINS_NUM] = {NULL};
+       guint8 exc_throw_found [MONO_EXC_INTRINS_NUM] = {0};
+       guint32 code_size;
+       int exc_count = 0;
+       int max_epilog_size = 50;
+
+       /* count the number of exception infos */
+     
+       /* 
+        * make sure we have enough space for exceptions
+        * 24 is the simulated call to throw_exception_by_name
+        */
+       for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
+               if (patch_info->type == MONO_PATCH_INFO_EXC) {
+                       i = exception_id_by_name (patch_info->data.target);
+                       if (!exc_throw_found [i]) {
+                               max_epilog_size += 12;
+                               exc_throw_found [i] = TRUE;
+                       }
+               } else if (patch_info->type == MONO_PATCH_INFO_BB_OVF)
+                       max_epilog_size += 12;
+               else if (patch_info->type == MONO_PATCH_INFO_EXC_OVF) {
+                       MonoOvfJump *ovfj = patch_info->data.target;
+                       i = exception_id_by_name (ovfj->data.exception);
+                       if (!exc_throw_found [i]) {
+                               max_epilog_size += 12;
+                               exc_throw_found [i] = TRUE;
+                       }
+                       max_epilog_size += 8;
+               }
+       }
+
+       while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               mono_jit_stats.code_reallocs++;
+       }
+
+       code = cfg->native_code + cfg->code_len;
+
        /* add code to raise exceptions */
        for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
                switch (patch_info->type) {
@@ -3553,32 +4061,44 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                        ppc_patch (code - 4, ip + 4); /* jump back after the initiali branch */
                        /* jump back to the true target */
                        ppc_b (code, 0);
-                       ip = ovfj->bb->native_offset + cfg->native_code;
+                       ip = ovfj->data.bb->native_offset + cfg->native_code;
                        ppc_patch (code - 4, ip);
                        break;
                }
                case MONO_PATCH_INFO_EXC_OVF: {
                        MonoOvfJump *ovfj = patch_info->data.target;
+                       MonoJumpInfo *newji;
                        unsigned char *ip = patch_info->ip.i + cfg->native_code;
-                       /* patch the initial jump */
+                       unsigned char *bcl = code;
+                       /* patch the initial jump: we arrived here with a call */
                        ppc_patch (ip, code);
-                       ppc_bc (code, ovfj->b0_cond, ovfj->b1_cond, 2);
+                       ppc_bc (code, ovfj->b0_cond, ovfj->b1_cond, 0);
                        ppc_b (code, 0);
                        ppc_patch (code - 4, ip + 4); /* jump back after the initiali branch */
-                       /* jump back to the true target */
-                       ppc_b (code, 0);
-                       ip = (char*)ovfj->ip + 4;
-                       ppc_patch (code - 4, ip);
+                       /* patch the conditional jump to the right handler */
+                       /* make it processed next */
+                       newji = mono_mempool_alloc (cfg->mempool, sizeof (MonoJumpInfo));
+                       newji->type = MONO_PATCH_INFO_EXC;
+                       newji->ip.i = bcl - cfg->native_code;
+                       newji->data.target = ovfj->data.exception;
+                       newji->next = patch_info->next;
+                       patch_info->next = newji;
                        break;
                }
                case MONO_PATCH_INFO_EXC: {
                        unsigned char *ip = patch_info->ip.i + cfg->native_code;
+                       i = exception_id_by_name (patch_info->data.target);
+                       if (exc_throw_pos [i]) {
+                               ppc_patch (ip, exc_throw_pos [i]);
+                               patch_info->type = MONO_PATCH_INFO_NONE;
+                               break;
+                       } else {
+                               exc_throw_pos [i] = code;
+                       }
                        ppc_patch (ip, code);
                        /*mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);*/
                        ppc_load (code, ppc_r3, patch_info->data.target);
-                       /* simulate a call from ip */
-                       ppc_load (code, ppc_r0, ip + 4);
-                       ppc_mtlr (code, ppc_r0);
+                       /* we got here from a conditional call, so the calling ip is set in lr already */
                        patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
                        patch_info->data.name = "mono_arch_throw_exception_by_name";
                        patch_info->ip.i = code - cfg->native_code;
@@ -3597,9 +4117,145 @@ mono_arch_emit_epilog (MonoCompile *cfg)
 
 }
 
+static int
+try_offset_access (void *value, guint32 idx)
+{
+       register void* me __asm__ ("r2");
+       void ***p = (void***)((char*)me + 284);
+       int idx1 = idx / 32;
+       int idx2 = idx % 32;
+       if (!p [idx1])
+               return 0;
+       if (value != p[idx1][idx2])
+               return 0;
+       return 1;
+}
+
+static void
+setup_tls_access (void)
+{
+       guint32 ptk;
+       guint32 *ins, *code;
+       guint32 cmplwi_1023, li_0x48, blr_ins;
+       if (tls_mode == TLS_MODE_FAILED)
+               return;
+
+       if (g_getenv ("MONO_NO_TLS")) {
+               tls_mode = TLS_MODE_FAILED;
+               return;
+       }
+
+       if (tls_mode == TLS_MODE_DETECT) {
+               ins = (guint32*)pthread_getspecific;
+               /* uncond branch to the real method */
+               if ((*ins >> 26) == 18) {
+                       gint32 val;
+                       val = (*ins & ~3) << 6;
+                       val >>= 6;
+                       if (*ins & 2) {
+                               /* absolute */
+                               ins = (guint32*)val;
+                       } else {
+                               ins = (guint32*) ((char*)ins + val);
+                       }
+               }
+               code = &cmplwi_1023;
+               ppc_cmpli (code, 0, 0, ppc_r3, 1023);
+               code = &li_0x48;
+               ppc_li (code, ppc_r4, 0x48);
+               code = &blr_ins;
+               ppc_blr (code);
+               if (*ins == cmplwi_1023) {
+                       int found_lwz_284 = 0;
+                       for (ptk = 0; ptk < 20; ++ptk) {
+                               ++ins;
+                               if (!*ins || *ins == blr_ins)
+                                       break;
+                               if ((guint16)*ins == 284 && (*ins >> 26) == 32) {
+                                       found_lwz_284 = 1;
+                                       break;
+                               }
+                       }
+                       if (!found_lwz_284) {
+                               tls_mode = TLS_MODE_FAILED;
+                               return;
+                       }
+                       tls_mode = TLS_MODE_LTHREADS;
+               } else if (*ins == li_0x48) {
+                       ++ins;
+                       /* uncond branch to the real method */
+                       if ((*ins >> 26) == 18) {
+                               gint32 val;
+                               val = (*ins & ~3) << 6;
+                               val >>= 6;
+                               if (*ins & 2) {
+                                       /* absolute */
+                                       ins = (guint32*)val;
+                               } else {
+                                       ins = (guint32*) ((char*)ins + val);
+                               }
+                               code = &val;
+                               ppc_li (code, ppc_r0, 0x7FF2);
+                               if (ins [1] == val) {
+                                       /* Darwin on G4, implement */
+                                       tls_mode = TLS_MODE_FAILED;
+                                       return;
+                               } else {
+                                       code = &val;
+                                       ppc_mfspr (code, ppc_r3, 104);
+                                       if (ins [1] != val) {
+                                               tls_mode = TLS_MODE_FAILED;
+                                               return;
+                                       }
+                                       tls_mode = TLS_MODE_DARWIN_G5;
+                               }
+                       } else {
+                               tls_mode = TLS_MODE_FAILED;
+                               return;
+                       }
+               } else {
+                       tls_mode = TLS_MODE_FAILED;
+                       return;
+               }
+       }
+       if (monodomain_key == -1) {
+               ptk = mono_domain_get_tls_key ();
+               if (ptk < 1024) {
+                       ptk = mono_pthread_key_for_tls (ptk);
+                       if (ptk < 1024) {
+                               monodomain_key = ptk;
+                       }
+               }
+       }
+       if (lmf_pthread_key == -1) {
+               ptk = mono_pthread_key_for_tls (mono_jit_tls_id);
+               if (ptk < 1024) {
+                       /*g_print ("MonoLMF at: %d\n", ptk);*/
+                       /*if (!try_offset_access (mono_get_lmf_addr (), ptk)) {
+                               init_tls_failed = 1;
+                               return;
+                       }*/
+                       lmf_pthread_key = ptk;
+               }
+       }
+       if (monothread_key == -1) {
+               ptk = mono_thread_get_tls_key ();
+               if (ptk < 1024) {
+                       ptk = mono_pthread_key_for_tls (ptk);
+                       if (ptk < 1024) {
+                               monothread_key = ptk;
+                               /*g_print ("thread inited: %d\n", ptk);*/
+                       }
+               } else {
+                       /*g_print ("thread not inited yet %d\n", ptk);*/
+               }
+       }
+}
+
 void
 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
 {
+       setup_tls_access ();
 }
 
 void
@@ -3635,20 +4291,54 @@ mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_re
        }
 }
 
-gint
-mono_arch_get_opcode_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
+MonoInst*
+mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
 {
-       /* optional instruction, need to detect it
-       if (cmethod->klass == mono_defaults.math_class) {
-               if (strcmp (cmethod->name, "Sqrt") == 0)
-                       return OP_SQRT;
+       MonoInst *ins = NULL;
+
+       if (cmethod->klass == mono_defaults.thread_class &&
+                       strcmp (cmethod->name, "MemoryBarrier") == 0) {
+               MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
+       }
+       /*if (cmethod->klass == mono_defaults.math_class) {
+               if (strcmp (cmethod->name, "Sqrt") == 0) {
+                       MONO_INST_NEW (cfg, ins, OP_SQRT);
+                       ins->inst_i0 = args [0];
+               }
        }*/
-       return -1;
+       return ins;
 }
 
-
 gboolean
 mono_arch_print_tree (MonoInst *tree, int arity)
 {
        return 0;
 }
+
+MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
+{
+       MonoInst* ins;
+
+       setup_tls_access ();
+       if (monodomain_key == -1)
+               return NULL;
+       
+       MONO_INST_NEW (cfg, ins, OP_TLS_GET);
+       ins->inst_offset = monodomain_key;
+       return ins;
+}
+
+MonoInst* 
+mono_arch_get_thread_intrinsic (MonoCompile* cfg)
+{
+       MonoInst* ins;
+
+       setup_tls_access ();
+       if (monothread_key == -1)
+               return NULL;
+       
+       MONO_INST_NEW (cfg, ins, OP_TLS_GET);
+       ins->inst_offset = monothread_key;
+       return ins;
+}
+