2008-09-27 Mark Probst <mark.probst@gmail.com>
[mono.git] / mono / mini / mini-ppc.c
index 9f6e1aef3f1324ef1341d664d28a10ba88ced9a1..bed880b6b605dd1541958e66109b97e640c957a6 100644 (file)
 
 #include "mini-ppc.h"
 #include "inssel.h"
-#include "cpu-g4.h"
+#include "cpu-ppc.h"
 #include "trace.h"
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+#endif
+
+/* From ir-emit.h */
+static inline guint32
+alloc_ireg (MonoCompile *cfg)
+{
+       return cfg->next_vreg ++;
+}
+
+static inline guint32
+alloc_lreg (MonoCompile *cfg)
+{
+#if SIZEOF_VOID_P == 8
+       return cfg->next_vreg ++;
+#else
+       /* Use a pair of consecutive vregs */
+       guint32 res = cfg->next_vreg;
+
+       cfg->next_vreg += 3;
+
+       return res;
+#endif
+}
+
+static inline guint32
+alloc_freg (MonoCompile *cfg)
+{
+#ifdef MONO_ARCH_SOFT_FLOAT
+       /* Allocate an lvreg so float ops can be decomposed into long ops */
+       return alloc_lreg (cfg);
+#else
+       /* Allocate these from the same pool as the int regs */
+       return cfg->next_vreg ++;
+#endif
+}
+
+static inline guint32
+alloc_dreg (MonoCompile *cfg, MonoStackType stack_type)
+{
+       switch (stack_type) {
+       case STACK_I4:
+       case STACK_PTR:
+       case STACK_MP:
+       case STACK_OBJ:
+               return alloc_ireg (cfg);
+       case STACK_R8:
+               return alloc_freg (cfg);
+       case STACK_I8:
+               return alloc_lreg (cfg);
+       case STACK_VTYPE:
+               return alloc_ireg (cfg);
+       default:
+               g_assert_not_reached ();
+       }
+}
+
+#ifdef MONO_ARCH_SOFT_FLOAT
+#define DECOMPOSE_INTO_REGPAIR(stack_type) ((stack_type) == STACK_I8 || (stack_type) == STACK_R8)
+#else
+#define DECOMPOSE_INTO_REGPAIR(stack_type) ((stack_type) == STACK_I8)
+#endif
+
+#define NEW_VARLOADA(cfg,dest,var,vartype) do {        \
+        MONO_INST_NEW ((cfg), (dest), OP_LDADDR); \
+               (dest)->inst_p0 = (var); \
+               (var)->flags |= MONO_INST_INDIRECT;     \
+               (dest)->type = STACK_MP;        \
+               (dest)->klass = (var)->klass;   \
+        (dest)->dreg = alloc_dreg ((cfg), STACK_MP); \
+               if (SIZEOF_VOID_P == 4 && DECOMPOSE_INTO_REGPAIR ((var)->type)) { MonoInst *var1 = get_vreg_to_inst (cfg, (var)->dreg + 1); MonoInst *var2 = get_vreg_to_inst (cfg, (var)->dreg + 2); g_assert (var1); g_assert (var2); var1->flags |= MONO_INST_INDIRECT; var2->flags |= MONO_INST_INDIRECT; } \
+       } while (0)
+
+#define EMIT_NEW_VARLOADA(cfg,dest,var,vartype) do { NEW_VARLOADA ((cfg), (dest), (var), (vartype)); MONO_ADD_INS ((cfg)->cbb, (dest)); } while (0)
+
+#define FORCE_INDIR_CALL 1
+
+enum {
+       TLS_MODE_DETECT,
+       TLS_MODE_FAILED,
+       TLS_MODE_LTHREADS,
+       TLS_MODE_NPTL,
+       TLS_MODE_DARWIN_G4,
+       TLS_MODE_DARWIN_G5
+};
+
+/* This mutex protects architecture specific caches */
+#define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
+#define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
+static CRITICAL_SECTION mini_arch_mutex;
 
 int mono_exc_esp_offset = 0;
+static int tls_mode = TLS_MODE_DETECT;
+static int lmf_pthread_key = -1;
+static int monothread_key = -1;
+static int monodomain_key = -1;
+
+static int
+offsets_from_pthread_key (guint32 key, int *offset2)
+{
+       int idx1 = key / 32;
+       int idx2 = key % 32;
+       *offset2 = idx2 * sizeof (gpointer);
+       return 284 + idx1 * sizeof (gpointer);
+}
+
+#define emit_linuxthreads_tls(code,dreg,key) do {\
+               int off1, off2; \
+               off1 = offsets_from_pthread_key ((key), &off2); \
+               ppc_lwz ((code), (dreg), off1, ppc_r2); \
+               ppc_lwz ((code), (dreg), off2, (dreg)); \
+       } while (0);
+
+#define emit_darwing5_tls(code,dreg,key) do {\
+               int off1 = 0x48 + key * sizeof (gpointer);      \
+               ppc_mfspr ((code), (dreg), 104);        \
+               ppc_lwz ((code), (dreg), off1, (dreg)); \
+       } while (0);
+
+/* FIXME: ensure the sc call preserves all but r3 */
+#define emit_darwing4_tls(code,dreg,key) do {\
+               int off1 = 0x48 + key * sizeof (gpointer);      \
+               if ((dreg) != ppc_r3) ppc_mr ((code), ppc_r11, ppc_r3); \
+               ppc_li ((code), ppc_r0, 0x7FF2);        \
+               ppc_sc ((code));        \
+               ppc_lwz ((code), (dreg), off1, ppc_r3); \
+               if ((dreg) != ppc_r3) ppc_mr ((code), ppc_r3, ppc_r11); \
+       } while (0);
+
+#define emit_tls_access(code,dreg,key) do {    \
+               switch (tls_mode) {     \
+               case TLS_MODE_LTHREADS: emit_linuxthreads_tls(code,dreg,key); break;    \
+               case TLS_MODE_DARWIN_G5: emit_darwing5_tls(code,dreg,key); break;       \
+               case TLS_MODE_DARWIN_G4: emit_darwing4_tls(code,dreg,key); break;       \
+               default: g_assert_not_reached ();       \
+               }       \
+       } while (0)
 
 const char*
 mono_arch_regname (int reg) {
-       static const char * rnames[] = {
-               "ppc_r0", "ppc_sp", "ppc_r2", "ppc_r3", "ppc_r4",
-               "ppc_r5", "ppc_r6", "ppc_r7", "ppc_r8", "ppc_r9",
-               "ppc_r10", "ppc_r11", "ppc_r12", "ppc_r13", "ppc_r14",
-               "ppc_r15", "ppc_r16", "ppc_r17", "ppc_r18", "ppc_r19",
-               "ppc_r20", "ppc_r21", "ppc_r22", "ppc_r23", "ppc_r24",
-               "ppc_r25", "ppc_r26", "ppc_r27", "ppc_r28", "ppc_r29",
-               "ppc_r30", "ppc_r31"
+       static const char rnames[][4] = {
+               "r0", "sp", "r2", "r3", "r4",
+               "r5", "r6", "r7", "r8", "r9",
+               "r10", "r11", "r12", "r13", "r14",
+               "r15", "r16", "r17", "r18", "r19",
+               "r20", "r21", "r22", "r23", "r24",
+               "r25", "r26", "r27", "r28", "r29",
+               "r30", "r31"
+       };
+       if (reg >= 0 && reg < 32)
+               return rnames [reg];
+       return "unknown";
+}
+
+const char*
+mono_arch_fregname (int reg) {
+       static const char rnames[][4] = {
+               "f0", "f1", "f2", "f3", "f4",
+               "f5", "f6", "f7", "f8", "f9",
+               "f10", "f11", "f12", "f13", "f14",
+               "f15", "f16", "f17", "f18", "f19",
+               "f20", "f21", "f22", "f23", "f24",
+               "f25", "f26", "f27", "f28", "f29",
+               "f30", "f31"
        };
        if (reg >= 0 && reg < 32)
                return rnames [reg];
        return "unknown";
 }
 
-/* this function overwrites r0 */
+/* this function overwrites r0, r11, r12 */
 static guint8*
 emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffset)
 {
        /* unrolled, use the counter in big */
+       if (size > sizeof (gpointer) * 5) {
+               int shifted = size >> 2;
+               guint8 *copy_loop_start, *copy_loop_jump;
+
+               ppc_load (code, ppc_r0, shifted);
+               ppc_mtctr (code, ppc_r0);
+               g_assert (sreg == ppc_r11);
+               ppc_addi (code, ppc_r12, dreg, (doffset - 4));
+               ppc_addi (code, ppc_r11, sreg, (soffset - 4));
+               copy_loop_start = code;
+               ppc_lwzu (code, ppc_r0, ppc_r11, 4);
+               ppc_stwu (code, ppc_r0, 4, ppc_r12);
+               copy_loop_jump = code;
+               ppc_bc (code, PPC_BR_DEC_CTR_NONZERO, 0, 0);
+               ppc_patch (copy_loop_jump, copy_loop_start);
+               size -= shifted * 4;
+               doffset = soffset = 0;
+               dreg = ppc_r12;
+       }
        while (size >= 4) {
                ppc_lwz (code, ppc_r0, soffset, sreg);
                ppc_stw (code, ppc_r0, doffset, dreg);
@@ -100,9 +271,9 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit
        for (k = 0; k < param_count; k++) {
                
                if (csig->pinvoke)
-                       size = mono_type_native_stack_size (csig->params [k], &align);
+                       size = mono_type_native_stack_size (csig->params [k], (guint32*)&align);
                else
-                       size = mono_type_stack_size (csig->params [k], &align);
+                       size = mini_type_stack_size (NULL, csig->params [k], &align);
 
                /* ignore alignment for now */
                align = 1;
@@ -124,6 +295,150 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit
        return frame_size;
 }
 
+gpointer
+mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement)
+{
+       char *o = NULL;
+       int reg, offset = 0;
+       guint32* code = (guint32*)code_ptr;
+
+       *displacement = 0;
+
+       /* This is the 'blrl' instruction */
+       --code;
+
+       /* Sanity check: instruction must be 'blrl' */
+       if (*code != 0x4e800021)
+               return NULL;
+
+       /* the thunk-less direct call sequence: lis/ori/mtlr/blrl */
+       if ((code [-1] >> 26) == 31 && (code [-2] >> 26) == 24 && (code [-3] >> 26) == 15) {
+               return NULL;
+       }
+
+       /* OK, we're now at the 'blrl' instruction. Now walk backwards
+       till we get to a 'mtlr rA' */
+       for (; --code;) {
+               if((*code & 0x7c0803a6) == 0x7c0803a6) {
+                       gint16 soff;
+                       /* Here we are: we reached the 'mtlr rA'.
+                       Extract the register from the instruction */
+                       reg = (*code & 0x03e00000) >> 21;
+                       --code;
+                       /* ok, this is a lwz reg, offset (vtreg) 
+                        * it is emitted with:
+                        * ppc_emit32 (c, (32 << 26) | ((D) << 21) | ((a) << 16) | (guint16)(d))
+                        */
+                       soff = (*code & 0xffff);
+                       offset = soff;
+                       reg = (*code >> 16) & 0x1f;
+                       g_assert (reg != ppc_r1);
+                       /*g_print ("patching reg is %d\n", reg);*/
+                       if (reg >= 13) {
+                               MonoLMF *lmf = (MonoLMF*)((char*)regs + (14 * sizeof (double)) + (13 * sizeof (gulong)));
+                               /* saved in the MonoLMF structure */
+                               o = (gpointer)lmf->iregs [reg - 13];
+                       } else {
+                               o = regs [reg];
+                       }
+                       break;
+               }
+       }
+       *displacement = offset;
+       return o;
+}
+
+gpointer*
+mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
+{
+       gpointer vt;
+       int displacement;
+       vt = mono_arch_get_vcall_slot (code, regs, &displacement);
+       if (!vt)
+               return NULL;
+       return (gpointer*)((char*)vt + displacement);
+}
+
+#define MAX_ARCH_DELEGATE_PARAMS 7
+
+gpointer
+mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
+{
+       guint8 *code, *start;
+
+       /* FIXME: Support more cases */
+       if (MONO_TYPE_ISSTRUCT (sig->ret))
+               return NULL;
+
+       if (has_target) {
+               static guint8* cached = NULL;
+               mono_mini_arch_lock ();
+               if (cached) {
+                       mono_mini_arch_unlock ();
+                       return cached;
+               }
+               
+               start = code = mono_global_codeman_reserve (16);
+
+               /* Replace the this argument with the target */
+               ppc_lwz (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3);
+               ppc_mtctr (code, ppc_r0);
+               ppc_lwz (code, ppc_r3, G_STRUCT_OFFSET (MonoDelegate, target), ppc_r3);
+               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
+
+               g_assert ((code - start) <= 16);
+
+               mono_arch_flush_icache (start, 16);
+               cached = start;
+               mono_mini_arch_unlock ();
+               return cached;
+       } else {
+               static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
+               int size, i;
+
+               if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
+                       return NULL;
+               for (i = 0; i < sig->param_count; ++i)
+                       if (!mono_is_regsize_var (sig->params [i]))
+                               return NULL;
+
+               mono_mini_arch_lock ();
+               code = cache [sig->param_count];
+               if (code) {
+                       mono_mini_arch_unlock ();
+                       return code;
+               }
+
+               size = 12 + sig->param_count * 4;
+               start = code = mono_global_codeman_reserve (size);
+
+               ppc_lwz (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3);
+               ppc_mtctr (code, ppc_r0);
+               /* slide down the arguments */
+               for (i = 0; i < sig->param_count; ++i) {
+                       ppc_mr (code, (ppc_r3 + i), (ppc_r3 + i + 1));
+               }
+               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
+
+               g_assert ((code - start) <= size);
+
+               mono_arch_flush_icache (start, size);
+               cache [sig->param_count] = start;
+               mono_mini_arch_unlock ();
+               return start;
+       }
+       return NULL;
+}
+
+gpointer
+mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gssize *regs, guint8 *code)
+{
+       /* FIXME: handle returning a struct */
+       if (MONO_TYPE_ISSTRUCT (sig->ret))
+               return (gpointer)regs [ppc_r4];
+       return (gpointer)regs [ppc_r3];
+}
+
 /*
  * Initialize the cpu to execute managed code.
  */
@@ -132,6 +447,24 @@ mono_arch_cpu_init (void)
 {
 }
 
+/*
+ * Initialize architecture specific code.
+ */
+void
+mono_arch_init (void)
+{
+       InitializeCriticalSection (&mini_arch_mutex);   
+}
+
+/*
+ * Cleanup architecture specific code.
+ */
+void
+mono_arch_cleanup (void)
+{
+       DeleteCriticalSection (&mini_arch_mutex);
+}
+
 /*
  * This function returns the optimizations supported on this cpu.
  */
@@ -141,7 +474,7 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask)
        guint32 opts = 0;
 
        /* no ppc-specific optimizations yet */
-       *exclude_mask = MONO_OPT_INLINE;
+       *exclude_mask = 0;
        return opts;
 }
 
@@ -156,6 +489,7 @@ is_regsize_var (MonoType *t) {
        case MONO_TYPE_I:
        case MONO_TYPE_U:
        case MONO_TYPE_PTR:
+       case MONO_TYPE_FNPTR:
                return TRUE;
        case MONO_TYPE_OBJECT:
        case MONO_TYPE_STRING:
@@ -163,6 +497,10 @@ is_regsize_var (MonoType *t) {
        case MONO_TYPE_SZARRAY:
        case MONO_TYPE_ARRAY:
                return TRUE;
+       case MONO_TYPE_GENERICINST:
+               if (!mono_type_generic_inst_is_valuetype (t))
+                       return TRUE;
+               return FALSE;
        case MONO_TYPE_VALUETYPE:
                return FALSE;
        }
@@ -197,9 +535,6 @@ mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
        return vars;
 }
 
-#define USE_EXTRA_TEMPS ((1<<30) | (1<<29))
-//#define USE_EXTRA_TEMPS 0
-
 GList *
 mono_arch_get_global_int_regs (MonoCompile *cfg)
 {
@@ -207,10 +542,8 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
        int i, top = 32;
        if (cfg->frame_reg != ppc_sp)
                top = 31;
-#if USE_EXTRA_TEMPS
-       top = 29;
-#endif
-       for (i = 13; i < top; ++i)
+       /* ppc_r13 is used by the system on PPC EABI */
+       for (i = 14; i < top; ++i)
                regs = g_list_prepend (regs, GUINT_TO_POINTER (i));
 
        return regs;
@@ -230,37 +563,85 @@ mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
        return 2;
 }
 
-// code from ppc/tramp.c, try to keep in sync
-#define MIN_CACHE_LINE 8
+typedef struct {
+       long int type;
+       long int value;
+} AuxVec;
 
 void
 mono_arch_flush_icache (guint8 *code, gint size)
 {
-       guint i;
-       guint8 *p;
+       guint8 *p, *endp, *start;
+       static int cachelinesize = 0;
+       static int cachelineinc = 16;
 
-       p = code;
-       for (i = 0; i < size; i += MIN_CACHE_LINE, p += MIN_CACHE_LINE) {
-               asm ("dcbst 0,%0;" : : "r"(p) : "memory");
+       if (!cachelinesize) {
+#ifdef __APPLE__
+               int mib [3];
+               size_t len;
+               mib [0] = CTL_HW;
+               mib [1] = HW_CACHELINE;
+               len = sizeof (cachelinesize);
+               if (sysctl(mib, 2, &cachelinesize, (size_t*)&len, NULL, 0) == -1) {
+                       perror ("sysctl");
+                       cachelinesize = 128;
+               } else {
+                       cachelineinc = cachelinesize;
+                       /*g_print ("setting cl size to %d\n", cachelinesize);*/
+               }
+#elif defined(__linux__)
+               /* sadly this will work only with 2.6 kernels... */
+               FILE* f = fopen ("/proc/self/auxv", "rb");
+               if (f) {
+                       AuxVec vec;
+                       while (fread (&vec, sizeof (vec), 1, f) == 1) {
+                               if (vec.type == 19) {
+                                       cachelinesize = vec.value;
+                                       break;
+                               }
+                       }
+                       fclose (f);
+               }
+               if (!cachelinesize)
+                       cachelinesize = 128;
+#else
+#warning Need a way to get cache line size
+               cachelinesize = 128;
+#endif
+       }
+       p = start = code;
+       endp = p + size;
+       start = (guint8*)((guint32)start & ~(cachelinesize - 1));
+       /* use dcbf for smp support, later optimize for UP, see pem._64bit.d20030611.pdf page 211 */
+       if (1) {
+               for (p = start; p < endp; p += cachelineinc) {
+                       asm ("dcbf 0,%0;" : : "r"(p) : "memory");
+               }
+       } else {
+               for (p = start; p < endp; p += cachelineinc) {
+                       asm ("dcbst 0,%0;" : : "r"(p) : "memory");
+               }
        }
        asm ("sync");
        p = code;
-       for (i = 0; i < size; i += MIN_CACHE_LINE, p += MIN_CACHE_LINE) {
+       for (p = start; p < endp; p += cachelineinc) {
                asm ("icbi 0,%0; sync;" : : "r"(p) : "memory");
        }
        asm ("sync");
        asm ("isync");
 }
 
-#define NOT_IMPLEMENTED(x) \
-                g_error ("FIXME: %s is not yet implemented. (trampoline)", x);
+void
+mono_arch_flush_register_windows (void)
+{
+}
 
 #ifdef __APPLE__
 #define ALWAYS_ON_STACK(s) s
 #define FP_ALSO_IN_REG(s) s
 #else
 #define ALWAYS_ON_STACK(s)
-#define FP_ALSO_IN_REG(s) s
+#define FP_ALSO_IN_REG(s)
 #define ALIGN_DOUBLES
 #endif
 
@@ -274,7 +655,7 @@ enum {
 
 typedef struct {
        gint32  offset;
-       guint16 vtsize; /* in param area */
+       guint32 vtsize; /* in param area */
        guint8  reg;
        guint8  regtype : 4; /* 0 general, 1 basereg, 2 floating point register, see RegType* */
        guint8  size    : 4; /* 1, 2, 4, 8, or regs used by RegTypeStructByVal */
@@ -326,6 +707,28 @@ add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple)
        (*gr) ++;
 }
 
+#if __APPLE__
+static gboolean
+has_only_a_r48_field (MonoClass *klass)
+{
+       gpointer iter;
+       MonoClassField *f;
+       gboolean have_field = FALSE;
+       iter = NULL;
+       while ((f = mono_class_get_fields (klass, &iter))) {
+               if (!(f->type->attrs & FIELD_ATTRIBUTE_STATIC)) {
+                       if (have_field)
+                               return FALSE;
+                       if (!f->type->byref && (f->type->type == MONO_TYPE_R4 || f->type->type == MONO_TYPE_R8))
+                               have_field = TRUE;
+                       else
+                               return FALSE;
+               }
+       }
+       return have_field;
+}
+#endif
+
 static CallInfo*
 calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 {
@@ -351,10 +754,11 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
        }
         DEBUG(printf("params: %d\n", sig->param_count));
        for (i = 0; i < sig->param_count; ++i) {
-               if ((sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
+               if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
                         /* Prevent implicit arguments and sig_cookie from
                           being passed in registers */
                         gr = PPC_LAST_ARG_REG + 1;
+                       /* FIXME: don't we have to set fr, too? */
                         /* Emit the signature cookie just before the implicit arguments */
                         add_general (&gr, &stack_size, &cinfo->sig_cookie, TRUE);
                 }
@@ -366,7 +770,6 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        continue;
                }
                simpletype = mono_type_get_underlying_type (sig->params [i])->type;
-       enum_calc_size:
                switch (simpletype) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
@@ -401,6 +804,14 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        add_general (&gr, &stack_size, cinfo->args + n, TRUE);
                        n++;
                        break;
+               case MONO_TYPE_GENERICINST:
+                       if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
+                               cinfo->args [n].size = sizeof (gpointer);
+                               add_general (&gr, &stack_size, cinfo->args + n, TRUE);
+                               n++;
+                               break;
+                       }
+                       /* Fall through */
                case MONO_TYPE_VALUETYPE: {
                        gint size;
                        MonoClass *klass;
@@ -409,27 +820,51 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                            size = mono_class_native_size (klass, NULL);
                        else
                            size = mono_class_value_size (klass, NULL);
+#if __APPLE__
+                       if ((size == 4 || size == 8) && has_only_a_r48_field (klass)) {
+                               cinfo->args [n].size = size;
+
+                               /* It was 7, now it is 8 in LinuxPPC */
+                               if (fr <= PPC_LAST_FPARG_REG) {
+                                       cinfo->args [n].regtype = RegTypeFP;
+                                       cinfo->args [n].reg = fr;
+                                       fr ++;
+                                       FP_ALSO_IN_REG (gr ++);
+                                       if (size == 8)
+                                               FP_ALSO_IN_REG (gr ++);
+                                       ALWAYS_ON_STACK (stack_size += size);
+                               } else {
+                                       cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
+                                       cinfo->args [n].regtype = RegTypeBase;
+                                       cinfo->args [n].reg = ppc_sp; /* in the caller*/
+                                       stack_size += 8;
+                               }
+                               n++;
+                               break;
+                       }
+#endif
                        DEBUG(printf ("load %d bytes struct\n",
                                      mono_class_native_size (sig->params [i]->data.klass, NULL)));
 #if PPC_PASS_STRUCTS_BY_VALUE
                        {
                                int align_size = size;
                                int nwords = 0;
+                               int rest = PPC_LAST_ARG_REG - gr + 1;
+                               int n_in_regs;
                                align_size += (sizeof (gpointer) - 1);
                                align_size &= ~(sizeof (gpointer) - 1);
                                nwords = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
+                               n_in_regs = rest >= nwords? nwords: rest;
                                cinfo->args [n].regtype = RegTypeStructByVal;
                                if (gr > PPC_LAST_ARG_REG || (size >= 3 && size % 4 != 0)) {
                                        cinfo->args [n].size = 0;
                                        cinfo->args [n].vtsize = nwords;
                                } else {
-                                       int rest = PPC_LAST_ARG_REG - gr + 1;
-                                       int n_in_regs = rest >= nwords? nwords: rest;
                                        cinfo->args [n].size = n_in_regs;
                                        cinfo->args [n].vtsize = nwords - n_in_regs;
                                        cinfo->args [n].reg = gr;
-                                       gr += n_in_regs;
                                }
+                               gr += n_in_regs;
                                cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
                                /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/
                                stack_size += nwords * sizeof (gpointer);
@@ -437,6 +872,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 #else
                        add_general (&gr, &stack_size, cinfo->args + n, TRUE);
                        cinfo->args [n].regtype = RegTypeStructByAddr;
+                       cinfo->args [n].vtsize = size;
 #endif
                        n++;
                        break;
@@ -466,6 +902,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 #else
                        add_general (&gr, &stack_size, cinfo->args + n, TRUE);
                        cinfo->args [n].regtype = RegTypeStructByAddr;
+                       cinfo->args [n].vtsize = size;
 #endif
                        n++;
                        break;
@@ -516,9 +953,16 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                }
        }
 
+       if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
+               /* Prevent implicit arguments and sig_cookie from
+                  being passed in registers */
+               gr = PPC_LAST_ARG_REG + 1;
+               /* Emit the signature cookie just before the implicit arguments */
+               add_general (&gr, &stack_size, &cinfo->sig_cookie, TRUE);
+       }
+
        {
                simpletype = mono_type_get_underlying_type (sig->ret)->type;
-enum_retvalue:
                switch (simpletype) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
@@ -548,6 +992,12 @@ enum_retvalue:
                        cinfo->ret.reg = ppc_f1;
                        cinfo->ret.regtype = RegTypeFP;
                        break;
+               case MONO_TYPE_GENERICINST:
+                       if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
+                               cinfo->ret.reg = ppc_r3;
+                               break;
+                       }
+                       break;
                case MONO_TYPE_VALUETYPE:
                        break;
                case MONO_TYPE_TYPEDBYREF:
@@ -566,6 +1016,34 @@ enum_retvalue:
        return cinfo;
 }
 
+static void
+allocate_tailcall_valuetype_addrs (MonoCompile *cfg)
+{
+#if !PPC_PASS_STRUCTS_BY_VALUE
+       MonoMethodSignature *sig = mono_method_signature (cfg->method);
+       int num_structs = 0;
+       int i;
+
+       if (!(cfg->flags & MONO_CFG_HAS_TAIL))
+               return;
+
+       for (i = 0; i < sig->param_count; ++i) {
+               MonoType *type = mono_type_get_underlying_type (sig->params [i]);
+               if (type->type == MONO_TYPE_VALUETYPE)
+                       num_structs++;
+       }
+
+       if (num_structs) {
+               cfg->tailcall_valuetype_addrs =
+                       mono_mempool_alloc0 (cfg->mempool, sizeof (MonoInst*) * num_structs);
+               for (i = 0; i < num_structs; ++i) {
+                       cfg->tailcall_valuetype_addrs [i] =
+                               mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                       cfg->tailcall_valuetype_addrs [i]->flags |= MONO_INST_INDIRECT;
+               }
+       }
+#endif
+}
 
 /*
  * Set var information according to the calling convention. ppc version.
@@ -579,7 +1057,12 @@ mono_arch_allocate_vars (MonoCompile *m)
        MonoInst *inst;
        int i, offset, size, align, curinst;
        int frame_reg = ppc_sp;
+       gint32 *offsets;
+       guint32 locals_stack_size, locals_stack_align;
+
+       allocate_tailcall_valuetype_addrs (m);
 
+       m->flags |= MONO_CFG_HAS_SPILLUP;
 
        /* allow room for the vararg method args: void* and long/double */
        if (mono_jit_trace_calls != NULL && mono_trace_eval (m->method))
@@ -589,6 +1072,12 @@ mono_arch_allocate_vars (MonoCompile *m)
         */
        if (m->flags & MONO_CFG_HAS_VARARGS)
                m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
+       /* gtk-sharp and other broken code will dllimport vararg functions even with
+        * non-varargs signatures. Since there is little hope people will get this right
+        * we assume they won't.
+        */
+       if (m->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)
+               m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
 
        header = mono_method_get_header (m->method);
 
@@ -608,21 +1097,26 @@ mono_arch_allocate_vars (MonoCompile *m)
                m->used_int_regs |= 1 << frame_reg;
        }
 
-       sig = m->method->signature;
+       sig = mono_method_signature (m->method);
        
        offset = 0;
        curinst = 0;
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
                m->ret->opcode = OP_REGVAR;
-               m->ret->inst_c0 = ppc_r3;
+               m->ret->inst_c0 = m->ret->dreg = ppc_r3;
        } else {
-               /* FIXME: handle long and FP values */
+               /* FIXME: handle long values? */
                switch (mono_type_get_underlying_type (sig->ret)->type) {
                case MONO_TYPE_VOID:
                        break;
+               case MONO_TYPE_R4:
+               case MONO_TYPE_R8:
+                       m->ret->opcode = OP_REGVAR;
+                       m->ret->inst_c0 = m->ret->dreg = ppc_f1;
+                       break;
                default:
                        m->ret->opcode = OP_REGVAR;
-                       m->ret->inst_c0 = ppc_r3;
+                       m->ret->inst_c0 = m->ret->dreg = ppc_r3;
                        break;
                }
        }
@@ -664,42 +1158,52 @@ mono_arch_allocate_vars (MonoCompile *m)
         }
 
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
-               inst = m->ret;
                offset += sizeof(gpointer) - 1;
                offset &= ~(sizeof(gpointer) - 1);
-               inst->inst_offset = offset;
-               inst->opcode = OP_REGOFFSET;
-               inst->inst_basereg = frame_reg;
+
+               if (m->new_ir) {
+                       m->vret_addr->opcode = OP_REGOFFSET;
+                       m->vret_addr->inst_basereg = frame_reg;
+                       m->vret_addr->inst_offset = offset;
+
+                       if (G_UNLIKELY (m->verbose_level > 1)) {
+                               printf ("vret_addr =");
+                               mono_print_ins (m->vret_addr);
+                       }
+               } else {
+                       inst = m->ret;
+                       inst->inst_offset = offset;
+                       inst->opcode = OP_REGOFFSET;
+                       inst->inst_basereg = frame_reg;
+               }
+
                offset += sizeof(gpointer);
                if (sig->call_convention == MONO_CALL_VARARG)
                        m->sig_cookie += sizeof (gpointer);
        }
 
-       curinst = m->locals_start;
-       for (i = curinst; i < m->num_varinfo; ++i) {
-               inst = m->varinfo [i];
-               if ((inst->flags & MONO_INST_IS_DEAD) || inst->opcode == OP_REGVAR)
-                       continue;
-
-               /* inst->unused indicates native sized value types, this is used by the
-               * pinvoke wrappers when they call functions returning structure */
-               if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
-                       size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), &align);
-               else
-                       size = mono_type_size (inst->inst_vtype, &align);
-
-               offset += align - 1;
-               offset &= ~(align - 1);
-               inst->inst_offset = offset;
-               inst->opcode = OP_REGOFFSET;
-               inst->inst_basereg = frame_reg;
-               offset += size;
-               //g_print ("allocating local %d to %d\n", i, inst->inst_offset);
+       offsets = mono_allocate_stack_slots_full (m, FALSE, &locals_stack_size, &locals_stack_align);
+       if (locals_stack_align) {
+               offset += (locals_stack_align - 1);
+               offset &= ~(locals_stack_align - 1);
+       }
+       for (i = m->locals_start; i < m->num_varinfo; i++) {
+               if (offsets [i] != -1) {
+                       MonoInst *inst = m->varinfo [i];
+                       inst->opcode = OP_REGOFFSET;
+                       inst->inst_basereg = frame_reg;
+                       inst->inst_offset = offset + offsets [i];
+                       /*
+                       g_print ("allocating local %d (%s) to %d\n",
+                               i, mono_type_get_name (inst->inst_vtype), inst->inst_offset);
+                       */
+               }
        }
+       offset += locals_stack_size;
 
        curinst = 0;
        if (sig->hasthis) {
-               inst = m->varinfo [curinst];
+               inst = m->args [curinst];
                if (inst->opcode != OP_REGVAR) {
                        inst->opcode = OP_REGOFFSET;
                        inst->inst_basereg = frame_reg;
@@ -714,11 +1218,16 @@ mono_arch_allocate_vars (MonoCompile *m)
        }
 
        for (i = 0; i < sig->param_count; ++i) {
-               inst = m->varinfo [curinst];
+               inst = m->args [curinst];
                if (inst->opcode != OP_REGVAR) {
                        inst->opcode = OP_REGOFFSET;
                        inst->inst_basereg = frame_reg;
-                       size = mono_type_size (sig->params [i], &align);
+                       if (sig->pinvoke) {
+                               size = mono_type_native_stack_size (sig->params [i], (guint32*)&align);
+                               inst->backend.is_pinvoke = 1;
+                       } else {
+                               size = mono_type_size (sig->params [i], &align);
+                       }
                        offset += align - 1;
                        offset &= ~(align - 1);
                        inst->inst_offset = offset;
@@ -736,6 +1245,23 @@ mono_arch_allocate_vars (MonoCompile *m)
        /* change sign? */
        m->stack_offset = offset;
 
+       if (m->new_ir && sig->call_convention == MONO_CALL_VARARG) {
+               CallInfo *cinfo = calculate_sizes (m->method->signature, m->method->signature->pinvoke);
+
+               m->sig_cookie = cinfo->sig_cookie.offset;
+
+               g_free(cinfo);
+        }
+}
+
+void
+mono_arch_create_vars (MonoCompile *cfg)
+{
+       MonoMethodSignature *sig = mono_method_signature (cfg->method);
+
+       if (cfg->new_ir && MONO_TYPE_ISSTRUCT (sig->ret)) {
+               cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
+       }
 }
 
 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
@@ -776,7 +1302,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                        MONO_INST_NEW (cfg, arg, OP_OUTARG);
                        arg->inst_imm = cinfo->sig_cookie.offset;
                        arg->inst_left = sig_arg;
-                       
+                       arg->inst_call = call;
                        /* prepend, so they get reversed */
                        arg->next = call->out_args;
                        call->out_args = arg;
@@ -790,35 +1316,51 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                        in = call->args [i];
                        arg->cil_code = in->cil_code;
                        arg->inst_left = in;
+                       arg->inst_call = call;
                        arg->type = in->type;
-                       /* prepend, we'll need to reverse them later */
+                       /* prepend, so they get reversed */
                        arg->next = call->out_args;
                        call->out_args = arg;
                        if (ainfo->regtype == RegTypeGeneral) {
-                               arg->unused = ainfo->reg;
+                               arg->backend.reg3 = ainfo->reg;
                                call->used_iregs |= 1 << ainfo->reg;
                                if (arg->type == STACK_I8)
                                        call->used_iregs |= 1 << (ainfo->reg + 1);
                        } else if (ainfo->regtype == RegTypeStructByAddr) {
-                               /* FIXME: where si the data allocated? */
-                               arg->unused = ainfo->reg;
-                               call->used_iregs |= 1 << ainfo->reg;
+                               if (ainfo->offset) {
+                                       MonoPPCArgInfo *ai = mono_mempool_alloc0 (cfg->mempool, sizeof (MonoPPCArgInfo));
+                                       arg->opcode = OP_OUTARG_MEMBASE;
+                                       ai->reg = ainfo->reg;
+                                       ai->size = sizeof (gpointer);
+                                       ai->offset = ainfo->offset;
+                                       arg->backend.data = ai;
+                               } else {
+                                       arg->backend.reg3 = ainfo->reg;
+                                       call->used_iregs |= 1 << ainfo->reg;
+                               }
                        } else if (ainfo->regtype == RegTypeStructByVal) {
                                int cur_reg;
+                               MonoPPCArgInfo *ai = mono_mempool_alloc0 (cfg->mempool, sizeof (MonoPPCArgInfo));
                                /* mark the used regs */
                                for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
                                        call->used_iregs |= 1 << (ainfo->reg + cur_reg);
                                }
                                arg->opcode = OP_OUTARG_VT;
-                               arg->unused = ainfo->reg | (ainfo->size << 8) | (ainfo->vtsize << 16);
-                               arg->inst_imm = ainfo->offset;
+                               ai->reg = ainfo->reg;
+                               ai->size = ainfo->size;
+                               ai->vtsize = ainfo->vtsize;
+                               ai->offset = ainfo->offset;
+                               arg->backend.data = ai;
                        } else if (ainfo->regtype == RegTypeBase) {
-                               arg->opcode = OP_OUTARG;
-                               arg->unused = ainfo->reg | (ainfo->size << 8);
-                               arg->inst_imm = ainfo->offset;
+                               MonoPPCArgInfo *ai = mono_mempool_alloc0 (cfg->mempool, sizeof (MonoPPCArgInfo));
+                               arg->opcode = OP_OUTARG_MEMBASE;
+                               ai->reg = ainfo->reg;
+                               ai->size = ainfo->size;
+                               ai->offset = ainfo->offset;
+                               arg->backend.data = ai;
                        } else if (ainfo->regtype == RegTypeFP) {
                                arg->opcode = OP_OUTARG_R8;
-                               arg->unused = ainfo->reg;
+                               arg->backend.reg3 = ainfo->reg;
                                call->used_fregs |= 1 << ainfo->reg;
                                if (ainfo->size == 4) {
                                        arg->opcode = OP_OUTARG_R8;
@@ -846,6 +1388,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                }
                call->out_args = prev;
        }
+
        call->stack_usage = cinfo->stack_usage;
        cfg->param_area = MAX (cfg->param_area, cinfo->stack_usage);
        cfg->flags |= MONO_CFG_HAS_CALLS;
@@ -858,64 +1401,308 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
        return call;
 }
 
-/*
- * Allow tracing to work with this interface (with an optional argument)
- */
-
-/*
- * This may be needed on some archs or for debugging support.
- */
-void
-mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
+static void
+emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
 {
-       /* no stack room needed now (may be needed for FASTCALL-trace support) */
-       *stack = 0;
-       /* split prolog-epilog requirements? */
-       *code = 50; /* max bytes needed: check this number */
+       int sig_reg = mono_alloc_ireg (cfg);
+
+       MONO_EMIT_NEW_ICONST (cfg, sig_reg, (guint32)call->signature);
+       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG,
+                       ppc_r1, cinfo->sig_cookie.offset, sig_reg);
 }
 
-void*
-mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
+void
+mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
 {
-       guchar *code = p;
+       MonoInst *in, *ins;
+       MonoMethodSignature *sig;
+       int i, n;
+       CallInfo *cinfo;
 
-       ppc_load (code, ppc_r3, cfg->method);
-       ppc_li (code, ppc_r4, 0); /* NULL ebp for now */
-       ppc_load (code, ppc_r0, func);
-       ppc_mtlr (code, ppc_r0);
-       ppc_blrl (code);
-       return code;
-}
+       sig = call->signature;
+       n = sig->param_count + sig->hasthis;
+       
+       cinfo = calculate_sizes (sig, sig->pinvoke);
 
-enum {
-       SAVE_NONE,
-       SAVE_STRUCT,
-       SAVE_ONE,
-       SAVE_TWO,
-       SAVE_FP
-};
+       for (i = 0; i < n; ++i) {
+               ArgInfo *ainfo = cinfo->args + i;
+               MonoType *t;
 
-void*
-mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
-{
-       guchar *code = p;
-       int save_mode = SAVE_NONE;
-       MonoMethod *method = cfg->method;
-       int rtype = mono_type_get_underlying_type (method->signature->ret)->type;
-       int save_offset = PPC_STACK_PARAM_OFFSET + cfg->param_area;
-       save_offset += 15;
-       save_offset &= ~15;
-       
-handle_enum:
-       switch (rtype) {
-       case MONO_TYPE_VOID:
-               /* special case string .ctor icall */
-               if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
-                       save_mode = SAVE_ONE;
+               if (i >= sig->hasthis)
+                       t = sig->params [i - sig->hasthis];
                else
-                       save_mode = SAVE_NONE;
-               break;
-       case MONO_TYPE_I8:
+                       t = &mono_defaults.int_class->byval_arg;
+               t = mono_type_get_underlying_type (t);
+
+               if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos))
+                       emit_sig_cookie (cfg, call, cinfo);
+
+               in = call->args [i];
+
+               if (ainfo->regtype == RegTypeGeneral) {
+                       if (!t->byref && ((t->type == MONO_TYPE_I8) || (t->type == MONO_TYPE_U8))) {
+                               MONO_INST_NEW (cfg, ins, OP_MOVE);
+                               ins->dreg = mono_alloc_ireg (cfg);
+                               ins->sreg1 = in->dreg + 1;
+                               MONO_ADD_INS (cfg->cbb, ins);
+                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg + 1, FALSE);
+
+                               MONO_INST_NEW (cfg, ins, OP_MOVE);
+                               ins->dreg = mono_alloc_ireg (cfg);
+                               ins->sreg1 = in->dreg + 2;
+                               MONO_ADD_INS (cfg->cbb, ins);
+                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
+                       } else {
+                               MONO_INST_NEW (cfg, ins, OP_MOVE);
+                               ins->dreg = mono_alloc_ireg (cfg);
+                               ins->sreg1 = in->dreg;
+                               MONO_ADD_INS (cfg->cbb, ins);
+
+                               mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
+                       }
+               } else if (ainfo->regtype == RegTypeStructByAddr) {
+                       MONO_INST_NEW (cfg, ins, OP_OUTARG_VT);
+                       ins->opcode = OP_OUTARG_VT;
+                       ins->sreg1 = in->dreg;
+                       ins->klass = in->klass;
+                       ins->inst_p0 = call;
+                       ins->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
+                       memcpy (ins->inst_p1, ainfo, sizeof (ArgInfo));
+                       MONO_ADD_INS (cfg->cbb, ins);
+               } else if (ainfo->regtype == RegTypeStructByVal) {
+                       /* this is further handled in mono_arch_emit_outarg_vt () */
+                       MONO_INST_NEW (cfg, ins, OP_OUTARG_VT);
+                       ins->opcode = OP_OUTARG_VT;
+                       ins->sreg1 = in->dreg;
+                       ins->klass = in->klass;
+                       ins->inst_p0 = call;
+                       ins->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
+                       memcpy (ins->inst_p1, ainfo, sizeof (ArgInfo));
+                       MONO_ADD_INS (cfg->cbb, ins);
+               } else if (ainfo->regtype == RegTypeBase) {
+                       if (!t->byref && ((t->type == MONO_TYPE_I8) || (t->type == MONO_TYPE_U8))) {
+                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, ppc_r1, ainfo->offset, in->dreg);
+                       } else if (!t->byref && ((t->type == MONO_TYPE_R4) || (t->type == MONO_TYPE_R8))) {
+                               if (t->type == MONO_TYPE_R8)
+                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ppc_r1, ainfo->offset, in->dreg);
+                               else
+                                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, ppc_r1, ainfo->offset, in->dreg);
+                       } else {
+                               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ppc_r1, ainfo->offset, in->dreg);
+                       }
+               } else if (ainfo->regtype == RegTypeFP) {
+                       if (t->type == MONO_TYPE_VALUETYPE) {
+                               /* this is further handled in mono_arch_emit_outarg_vt () */
+                               MONO_INST_NEW (cfg, ins, OP_OUTARG_VT);
+                               ins->opcode = OP_OUTARG_VT;
+                               ins->sreg1 = in->dreg;
+                               ins->klass = in->klass;
+                               ins->inst_p0 = call;
+                               ins->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
+                               memcpy (ins->inst_p1, ainfo, sizeof (ArgInfo));
+                               MONO_ADD_INS (cfg->cbb, ins);
+
+                               cfg->flags |= MONO_CFG_HAS_FPOUT;
+                       } else {
+                               int dreg = mono_alloc_freg (cfg);
+
+                               if (ainfo->size == 4) {
+                                       MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, dreg, in->dreg);
+                               } else {
+                                       MONO_INST_NEW (cfg, ins, OP_FMOVE);
+                                       ins->dreg = dreg;
+                                       ins->sreg1 = in->dreg;
+                                       MONO_ADD_INS (cfg->cbb, ins);
+                               }
+
+                               mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg, TRUE);
+                               cfg->flags |= MONO_CFG_HAS_FPOUT;
+                       }
+               } else {
+                       g_assert_not_reached ();
+               }
+       }
+
+       /* Emit the signature cookie in the case that there is no
+          additional argument */
+       if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sig->sentinelpos))
+               emit_sig_cookie (cfg, call, cinfo);
+
+       if (cinfo->struct_ret) {
+               MonoInst *vtarg;
+
+               MONO_INST_NEW (cfg, vtarg, OP_MOVE);
+               vtarg->sreg1 = call->vret_var->dreg;
+               vtarg->dreg = mono_alloc_preg (cfg);
+               MONO_ADD_INS (cfg->cbb, vtarg);
+
+               mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->struct_ret, FALSE);
+       }
+
+       call->stack_usage = cinfo->stack_usage;
+       cfg->param_area = MAX (cfg->param_area, cinfo->stack_usage);
+       cfg->flags |= MONO_CFG_HAS_CALLS;
+
+       g_free (cinfo);
+}
+
+void
+mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
+{
+       MonoCallInst *call = (MonoCallInst*)ins->inst_p0;
+       ArgInfo *ainfo = ins->inst_p1;
+       int ovf_size = ainfo->vtsize;
+       int doffset = ainfo->offset;
+       int i, soffset, dreg;
+
+       if (ainfo->regtype == RegTypeStructByVal) {
+               guint32 size = 0;
+               soffset = 0;
+#ifdef __APPLE__
+               /*
+                * Darwin pinvokes needs some special handling for 1
+                * and 2 byte arguments
+                */
+               g_assert (ins->klass);
+               if (call->signature->pinvoke)
+                       size =  mono_class_native_size (ins->klass, NULL);
+               if (size == 2 || size == 1) {
+                       int tmpr = mono_alloc_ireg (cfg);
+                       if (size == 1)
+                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI1_MEMBASE, tmpr, src->dreg, soffset);
+                       else
+                               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI2_MEMBASE, tmpr, src->dreg, soffset);
+                       dreg = mono_alloc_ireg (cfg);
+                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, dreg, tmpr);
+                       mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg, FALSE);
+               } else
+#endif
+                       for (i = 0; i < ainfo->size; ++i) {
+                               dreg = mono_alloc_ireg (cfg);
+                               MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, soffset);
+                               mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg + i, FALSE);
+                               soffset += sizeof (gpointer);
+                       }
+               if (ovf_size != 0)
+                       mini_emit_memcpy2 (cfg, ppc_r1, doffset + soffset, src->dreg, soffset, ovf_size * sizeof (gpointer), 0);
+       } else if (ainfo->regtype == RegTypeFP) {
+               int tmpr = mono_alloc_freg (cfg);
+               if (ainfo->size == 4)
+                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR4_MEMBASE, tmpr, src->dreg, 0);
+               else
+                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmpr, src->dreg, 0);
+               dreg = mono_alloc_freg (cfg);
+               MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, dreg, tmpr);
+               mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg, TRUE);
+       } else {
+               MonoInst *vtcopy = mono_compile_create_var (cfg, &src->klass->byval_arg, OP_LOCAL);
+               MonoInst *load;
+               guint32 size;
+
+               /* FIXME: alignment? */
+               if (call->signature->pinvoke) {
+                       size = mono_type_native_stack_size (&src->klass->byval_arg, NULL);
+                       vtcopy->backend.is_pinvoke = 1;
+               } else {
+                       size = mini_type_stack_size (cfg->generic_sharing_context, &src->klass->byval_arg, NULL);
+               }
+               if (size > 0)
+                       g_assert (ovf_size > 0);
+
+               EMIT_NEW_VARLOADA (cfg, load, vtcopy, vtcopy->inst_vtype);
+               mini_emit_memcpy2 (cfg, load->dreg, 0, src->dreg, 0, size, 0);
+
+               if (ainfo->offset)
+                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ppc_r1, ainfo->offset, load->dreg);
+               else
+                       mono_call_inst_add_outarg_reg (cfg, call, load->dreg, ainfo->reg, FALSE);
+       }
+}
+
+void
+mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
+{
+       MonoType *ret = mono_type_get_underlying_type (mono_method_signature (method)->ret);
+
+       if (!ret->byref) {
+               if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
+                       MonoInst *ins;
+
+                       MONO_INST_NEW (cfg, ins, OP_SETLRET);
+                       ins->sreg1 = val->dreg + 1;
+                       ins->sreg2 = val->dreg + 2;
+                       MONO_ADD_INS (cfg->cbb, ins);
+                       return;
+               }
+               if (ret->type == MONO_TYPE_R8 || ret->type == MONO_TYPE_R4) {
+                       MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
+                       return;
+               }
+       }
+       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
+}
+
+/* FIXME: this is just a useless hint: fix the interface to include the opcode */
+gboolean
+mono_arch_is_inst_imm (gint64 imm)
+{
+       return TRUE;
+}
+
+/*
+ * Allow tracing to work with this interface (with an optional argument)
+ */
+
+void*
+mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
+{
+       guchar *code = p;
+
+       ppc_load (code, ppc_r3, cfg->method);
+       ppc_li (code, ppc_r4, 0); /* NULL ebp for now */
+       ppc_load (code, ppc_r0, func);
+       ppc_mtlr (code, ppc_r0);
+       ppc_blrl (code);
+       return code;
+}
+
+enum {
+       SAVE_NONE,
+       SAVE_STRUCT,
+       SAVE_ONE,
+       SAVE_TWO,
+       SAVE_FP
+};
+
+void*
+mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
+{
+       guchar *code = p;
+       int save_mode = SAVE_NONE;
+       int offset;
+       MonoMethod *method = cfg->method;
+       int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type;
+       int save_offset = PPC_STACK_PARAM_OFFSET + cfg->param_area;
+       save_offset += 15;
+       save_offset &= ~15;
+       
+       offset = code - cfg->native_code;
+       /* we need about 16 instructions */
+       if (offset > (cfg->code_size - 16 * 4)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               code = cfg->native_code + offset;
+       }
+
+       switch (rtype) {
+       case MONO_TYPE_VOID:
+               /* special case string .ctor icall */
+               if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
+                       save_mode = SAVE_ONE;
+               else
+                       save_mode = SAVE_NONE;
+               break;
+       case MONO_TYPE_I8:
        case MONO_TYPE_U8:
                save_mode = SAVE_TWO;
                break;
@@ -998,8 +1785,11 @@ handle_enum:
  * going to be perf critical anyway.
  */
 typedef struct {
-       MonoBasicBlock *bb;
-       void *ip;
+       union {
+               MonoBasicBlock *bb;
+               const char *exception;
+       } data;
+       guint32 ip_offset;
        guint16 b0_cond;
        guint16 b1_cond;
 } MonoOvfJump;
@@ -1019,8 +1809,8 @@ if (ins->flags & MONO_INST_BRLABEL) { \
                int br_disp = ins->inst_true_bb->max_offset - offset;   \
                if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \
                        MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump));    \
-                       ovfj->bb = ins->inst_true_bb;   \
-                       ovfj->ip = NULL;        \
+                       ovfj->data.bb = ins->inst_true_bb;      \
+                       ovfj->ip_offset = 0;    \
                        ovfj->b0_cond = (b0);   \
                        ovfj->b1_cond = (b1);   \
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB_OVF, ovfj); \
@@ -1044,31 +1834,33 @@ if (ins->flags & MONO_INST_BRLABEL) { \
                int br_disp = cfg->bb_exit->max_offset - offset;        \
                if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \
                        MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump));    \
-                       ovfj->bb = NULL;        \
-                       ovfj->ip = code;        \
+                       ovfj->data.exception = (exc_name);      \
+                       ovfj->ip_offset = code - cfg->native_code;      \
                        ovfj->b0_cond = (b0);   \
                        ovfj->b1_cond = (b1);   \
-                       /* FIXME: test this code */     \
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_OVF, ovfj); \
-                       ppc_b (code, 0);        \
+                       ppc_bl (code, 0);       \
                        cfg->bb_exit->max_offset += 24; \
                } else {        \
                        mono_add_patch_info (cfg, code - cfg->native_code,   \
                                    MONO_PATCH_INFO_EXC, exc_name);  \
-                       ppc_bc (code, (b0), (b1), 0);   \
+                       ppc_bcl (code, (b0), (b1), 0);  \
                }       \
        } while (0); 
 
 #define EMIT_COND_SYSTEM_EXCEPTION(cond,exc_name) EMIT_COND_SYSTEM_EXCEPTION_FLAGS(branch_b0_table [(cond)], branch_b1_table [(cond)], (exc_name))
 
-static void
-peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
+void
+mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins, *last_ins = NULL;
-       ins = bb->code;
+}
 
-       while (ins) {
+void
+mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
+{
+       MonoInst *ins, *n, *last_ins = NULL;
 
+       MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
                switch (ins->opcode) {
                case OP_MUL_IMM: 
                        /* remove unnecessary multiplication with 1 */
@@ -1076,8 +1868,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                                if (ins->dreg != ins->sreg1) {
                                        ins->opcode = OP_MOVE;
                                } else {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DELETE_INS (bb, ins);
                                        continue;
                                }
                        } else {
@@ -1099,8 +1890,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                            ins->inst_basereg == last_ins->inst_destbasereg &&
                            ins->inst_offset == last_ins->inst_offset) {
                                if (ins->dreg == last_ins->sreg1) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DELETE_INS (bb, ins);
                                        continue;
                                } else {
                                        //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
@@ -1123,8 +1913,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                              ins->inst_offset == last_ins->inst_offset) {
 
                                if (ins->dreg == last_ins->dreg) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
+                                       MONO_DELETE_INS (bb, ins);
                                        continue;
                                } else {
                                        ins->opcode = OP_MOVE;
@@ -1157,15 +1946,8 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
-                               if (ins->dreg == last_ins->sreg1) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
-                                       continue;
-                               } else {
-                                       //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
-                                       ins->opcode = OP_MOVE;
-                                       ins->sreg1 = last_ins->sreg1;
-                               }
+                               ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? OP_ICONV_TO_I1 : OP_ICONV_TO_U1;
+                               ins->sreg1 = last_ins->sreg1;                           
                        }
                        break;
                case OP_LOADU2_MEMBASE:
@@ -1173,29 +1955,17 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
                                        ins->inst_basereg == last_ins->inst_destbasereg &&
                                        ins->inst_offset == last_ins->inst_offset) {
-                               if (ins->dreg == last_ins->sreg1) {
-                                       last_ins->next = ins->next;                             
-                                       ins = ins->next;                                
-                                       continue;
-                               } else {
-                                       //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
-                                       ins->opcode = OP_MOVE;
-                                       ins->sreg1 = last_ins->sreg1;
-                               }
+                               ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? OP_ICONV_TO_I2 : OP_ICONV_TO_U2;
+                               ins->sreg1 = last_ins->sreg1;                           
                        }
                        break;
-               case CEE_CONV_I4:
-               case CEE_CONV_U4:
                case OP_MOVE:
-               case OP_SETREG:
                        ins->opcode = OP_MOVE;
                        /* 
                         * OP_MOVE reg, reg 
                         */
                        if (ins->dreg == ins->sreg1) {
-                               if (last_ins)
-                                       last_ins->next = ins->next;                             
-                               ins = ins->next;
+                               MONO_DELETE_INS (bb, ins);
                                continue;
                        }
                        /* 
@@ -1205,8 +1975,7 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (last_ins && last_ins->opcode == OP_MOVE &&
                            ins->sreg1 == last_ins->dreg &&
                            ins->dreg == last_ins->sreg1) {
-                               last_ins->next = ins->next;                             
-                               ins = ins->next;                                
+                               MONO_DELETE_INS (bb, ins);
                                continue;
                        }
                        break;
@@ -1217,6 +1986,58 @@ peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
        bb->last_ins = last_ins;
 }
 
+void
+mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
+{
+       g_assert (cfg->new_ir);
+
+       switch (ins->opcode) {
+       case OP_ICONV_TO_R_UN: {
+               static const guint64 adjust_val = 0x4330000000000000ULL;
+               int msw_reg = mono_alloc_ireg (cfg);
+               int adj_reg = mono_alloc_freg (cfg);
+               int tmp_reg = mono_alloc_freg (cfg);
+               MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
+               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, ppc_sp, -8, msw_reg);
+               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, ppc_sp, -4, ins->sreg1);
+               MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, &adjust_val);
+               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, ppc_sp, -8);
+               MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
+               ins->opcode = OP_NOP;
+               break;
+       }
+       case OP_ICONV_TO_R4:
+       case OP_ICONV_TO_R8: {
+               /* FIXME: change precision for CEE_CONV_R4 */
+               static const guint64 adjust_val = 0x4330000080000000ULL;
+               int msw_reg = mono_alloc_ireg (cfg);
+               int xored = mono_alloc_ireg (cfg);
+               int adj_reg = mono_alloc_freg (cfg);
+               int tmp_reg = mono_alloc_freg (cfg);
+               MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
+               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, ppc_sp, -8, msw_reg);
+               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
+               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, ppc_sp, -4, xored);
+               MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
+               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, ppc_sp, -8);
+               MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
+               if (ins->opcode == OP_ICONV_TO_R4)
+                       MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
+               ins->opcode = OP_NOP;
+               break;
+       }
+       case OP_CKFINITE: {
+               int msw_reg = mono_alloc_ireg (cfg);
+               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, ppc_sp, -8, ins->sreg1);
+               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI4_MEMBASE, msw_reg, ppc_sp, -8);
+               MONO_EMIT_NEW_UNALU (cfg, OP_CHECK_FINITE, -1, msw_reg);
+               MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, ins->dreg, ins->sreg1);
+               ins->opcode = OP_NOP;
+               break;
+       }
+       }
+}
+
 /* 
  * the branch_b0_table should maintain the order of these
  * opcodes.
@@ -1261,780 +2082,309 @@ branch_b1_table [] = {
        PPC_BR_LT 
 };
 
-/*
- * returns the offset used by spillvar. It allocates a new
- * spill variable if necessary. 
- */
-static int
-mono_spillvar_offset (MonoCompile *cfg, int spillvar)
-{
-       MonoSpillInfo **si, *info;
-       int i = 0;
-
-       si = &cfg->spill_info; 
-       
-       while (i <= spillvar) {
-
-               if (!*si) {
-                       *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
-                       info->next = NULL;
-                       info->offset = cfg->stack_offset;
-                       cfg->stack_offset += sizeof (gpointer);
-               }
-
-               if (i == spillvar)
-                       return (*si)->offset;
-
-               i++;
-               si = &(*si)->next;
-       }
-
-       g_assert_not_reached ();
-       return 0;
-}
-
-static int
-mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
-{
-       MonoSpillInfo **si, *info;
-       int i = 0;
-
-       si = &cfg->spill_info_float; 
-       
-       while (i <= spillvar) {
-
-               if (!*si) {
-                       *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
-                       info->next = NULL;
-                       cfg->stack_offset += 7;
-                       cfg->stack_offset &= ~7;
-                       info->offset = cfg->stack_offset;
-                       cfg->stack_offset += sizeof (double);
-               }
-
-               if (i == spillvar)
-                       return (*si)->offset;
-
-               i++;
-               si = &(*si)->next;
-       }
-
-       g_assert_not_reached ();
-       return 0;
-}
-
-#undef DEBUG
-#define DEBUG(a) if (cfg->verbose_level > 1) a
-//#define DEBUG(a)
-/* use ppc_r3-ppc_10,ppc_r12 as temp registers, f1-f13 for FP registers */
-#define PPC_CALLER_REGS ((0xff<<3) | (1<<12) | USE_EXTRA_TEMPS)
-#define PPC_CALLER_FREGS (0x3ffe)
-
-#define reg_is_freeable(r) (PPC_CALLER_REGS & 1 << (r))
-#define freg_is_freeable(r) ((r) >= 1 && (r) <= 13)
-
-typedef struct {
-       int born_in;
-       int killed_in;
-       int last_use;
-       int prev_use;
-} RegTrack;
-
-static const char*const * ins_spec = ppcg4;
-
-static void
-print_ins (int i, MonoInst *ins)
-{
-       const char *spec = ins_spec [ins->opcode];
-       g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
-       if (spec [MONO_INST_DEST]) {
-               if (ins->dreg >= MONO_MAX_IREGS)
-                       g_print (" R%d <-", ins->dreg);
-               else
-                       g_print (" %s <-", mono_arch_regname (ins->dreg));
-       }
-       if (spec [MONO_INST_SRC1]) {
-               if (ins->sreg1 >= MONO_MAX_IREGS)
-                       g_print (" R%d", ins->sreg1);
-               else
-                       g_print (" %s", mono_arch_regname (ins->sreg1));
-       }
-       if (spec [MONO_INST_SRC2]) {
-               if (ins->sreg2 >= MONO_MAX_IREGS)
-                       g_print (" R%d", ins->sreg2);
-               else
-                       g_print (" %s", mono_arch_regname (ins->sreg2));
-       }
-       if (spec [MONO_INST_CLOB])
-               g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
-       g_print ("\n");
-}
-
-static void
-print_regtrack (RegTrack *t, int num)
-{
-       int i;
-       char buf [32];
-       const char *r;
-       
-       for (i = 0; i < num; ++i) {
-               if (!t [i].born_in)
-                       continue;
-               if (i >= MONO_MAX_IREGS) {
-                       g_snprintf (buf, sizeof(buf), "R%d", i);
-                       r = buf;
-               } else
-                       r = mono_arch_regname (i);
-               g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
-       }
-}
-
-typedef struct InstList InstList;
-
-struct InstList {
-       InstList *prev;
-       InstList *next;
-       MonoInst *data;
-};
-
-static inline InstList*
-inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
-{
-       InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
-       item->data = data;
-       item->prev = NULL;
-       item->next = list;
-       if (list)
-               list->prev = item;
-       return item;
-}
-
-/*
- * Force the spilling of the variable in the symbolic register 'reg'.
- */
-static int
-get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
-{
-       MonoInst *load;
-       int i, sel, spill;
-       
-       sel = cfg->rs->iassign [reg];
-       /*i = cfg->rs->isymbolic [sel];
-       g_assert (i == reg);*/
-       i = reg;
-       spill = ++cfg->spill_count;
-       cfg->rs->iassign [i] = -spill - 1;
-       mono_regstate_free_int (cfg->rs, sel);
-       /* we need to create a spill var and insert a load to sel after the current instruction */
-       MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
-       load->dreg = sel;
-       load->inst_basereg = cfg->frame_reg;
-       load->inst_offset = mono_spillvar_offset (cfg, spill);
-       if (item->prev) {
-               while (ins->next != item->prev->data)
-                       ins = ins->next;
-       }
-       load->next = ins->next;
-       ins->next = load;
-       DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%sp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
-       i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
-       g_assert (i == sel);
-
-       return sel;
-}
-
-static int
-get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
-{
-       MonoInst *load;
-       int i, sel, spill;
-
-       DEBUG (g_print ("start regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
-       /* exclude the registers in the current instruction */
-       if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
-               if (ins->sreg1 >= MONO_MAX_IREGS)
-                       regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
-               else
-                       regmask &= ~ (1 << ins->sreg1);
-               DEBUG (g_print ("excluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
-       }
-       if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
-               if (ins->sreg2 >= MONO_MAX_IREGS)
-                       regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
-               else
-                       regmask &= ~ (1 << ins->sreg2);
-               DEBUG (g_print ("excluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
-       }
-       if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
-               regmask &= ~ (1 << ins->dreg);
-               DEBUG (g_print ("excluding dreg %s\n", mono_arch_regname (ins->dreg)));
-       }
-
-       DEBUG (g_print ("available regmask: 0x%08x\n", regmask));
-       g_assert (regmask); /* need at least a register we can free */
-       sel = -1;
-       /* we should track prev_use and spill the register that's farther */
-       for (i = 0; i < MONO_MAX_IREGS; ++i) {
-               if (regmask & (1 << i)) {
-                       sel = i;
-                       DEBUG (g_print ("selected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
-                       break;
-               }
-       }
-       i = cfg->rs->isymbolic [sel];
-       spill = ++cfg->spill_count;
-       cfg->rs->iassign [i] = -spill - 1;
-       mono_regstate_free_int (cfg->rs, sel);
-       /* we need to create a spill var and insert a load to sel after the current instruction */
-       MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
-       load->dreg = sel;
-       load->inst_basereg = cfg->frame_reg;
-       load->inst_offset = mono_spillvar_offset (cfg, spill);
-       if (item->prev) {
-               while (ins->next != item->prev->data)
-                       ins = ins->next;
-       }
-       load->next = ins->next;
-       ins->next = load;
-       DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%sp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
-       i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
-       g_assert (i == sel);
-       
-       return sel;
-}
+#define NEW_INS(cfg,dest,op) do {                                      \
+               MONO_INST_NEW((cfg), (dest), (op));                     \
+               mono_bblock_insert_after_ins (bb, last_ins, (dest));    \
+       } while (0)
 
 static int
-get_float_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
+map_to_reg_reg_op (int op)
 {
-       MonoInst *load;
-       int i, sel, spill;
-
-       DEBUG (g_print ("start regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
-       /* exclude the registers in the current instruction */
-       if (reg != ins->sreg1 && (freg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_FREGS && cfg->rs->fassign [ins->sreg1] >= 0))) {
-               if (ins->sreg1 >= MONO_MAX_FREGS)
-                       regmask &= ~ (1 << cfg->rs->fassign [ins->sreg1]);
-               else
-                       regmask &= ~ (1 << ins->sreg1);
-               DEBUG (g_print ("excluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
+       switch (op) {
+       case OP_ADD_IMM:
+               return OP_IADD;
+       case OP_SUB_IMM:
+               return OP_ISUB;
+       case OP_AND_IMM:
+               return OP_IAND;
+       case OP_COMPARE_IMM:
+               return OP_COMPARE;
+       case OP_ICOMPARE_IMM:
+               return OP_ICOMPARE;
+       case OP_ADDCC_IMM:
+               return OP_IADDCC;
+       case OP_ADC_IMM:
+               return OP_IADC;
+       case OP_SUBCC_IMM:
+               return OP_ISUBCC;
+       case OP_SBB_IMM:
+               return OP_ISBB;
+       case OP_OR_IMM:
+               return OP_IOR;
+       case OP_XOR_IMM:
+               return OP_IXOR;
+       case OP_MUL_IMM:
+               return OP_IMUL;
+       case OP_LOAD_MEMBASE:
+               return OP_LOAD_MEMINDEX;
+       case OP_LOADI4_MEMBASE:
+               return OP_LOADI4_MEMINDEX;
+       case OP_LOADU4_MEMBASE:
+               return OP_LOADU4_MEMINDEX;
+       case OP_LOADU1_MEMBASE:
+               return OP_LOADU1_MEMINDEX;
+       case OP_LOADI2_MEMBASE:
+               return OP_LOADI2_MEMINDEX;
+       case OP_LOADU2_MEMBASE:
+               return OP_LOADU2_MEMINDEX;
+       case OP_LOADI1_MEMBASE:
+               return OP_LOADI1_MEMINDEX;
+       case OP_LOADR4_MEMBASE:
+               return OP_LOADR4_MEMINDEX;
+       case OP_LOADR8_MEMBASE:
+               return OP_LOADR8_MEMINDEX;
+       case OP_STOREI1_MEMBASE_REG:
+               return OP_STOREI1_MEMINDEX;
+       case OP_STOREI2_MEMBASE_REG:
+               return OP_STOREI2_MEMINDEX;
+       case OP_STOREI4_MEMBASE_REG:
+               return OP_STOREI4_MEMINDEX;
+       case OP_STORE_MEMBASE_REG:
+               return OP_STORE_MEMINDEX;
+       case OP_STORER4_MEMBASE_REG:
+               return OP_STORER4_MEMINDEX;
+       case OP_STORER8_MEMBASE_REG:
+               return OP_STORER8_MEMINDEX;
+       case OP_STORE_MEMBASE_IMM:
+               return OP_STORE_MEMBASE_REG;
+       case OP_STOREI1_MEMBASE_IMM:
+               return OP_STOREI1_MEMBASE_REG;
+       case OP_STOREI2_MEMBASE_IMM:
+               return OP_STOREI2_MEMBASE_REG;
+       case OP_STOREI4_MEMBASE_IMM:
+               return OP_STOREI4_MEMBASE_REG;
        }
-       if (reg != ins->sreg2 && (freg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_FREGS && cfg->rs->fassign [ins->sreg2] >= 0))) {
-               if (ins->sreg2 >= MONO_MAX_FREGS)
-                       regmask &= ~ (1 << cfg->rs->fassign [ins->sreg2]);
-               else
-                       regmask &= ~ (1 << ins->sreg2);
-               DEBUG (g_print ("excluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
-       }
-       if (reg != ins->dreg && freg_is_freeable (ins->dreg)) {
-               regmask &= ~ (1 << ins->dreg);
-               DEBUG (g_print ("excluding dreg %s\n", mono_arch_regname (ins->dreg)));
-       }
-
-       DEBUG (g_print ("available regmask: 0x%08x\n", regmask));
-       g_assert (regmask); /* need at least a register we can free */
-       sel = -1;
-       /* we should track prev_use and spill the register that's farther */
-       for (i = 0; i < MONO_MAX_FREGS; ++i) {
-               if (regmask & (1 << i)) {
-                       sel = i;
-                       DEBUG (g_print ("selected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->fassign [sel]));
-                       break;
-               }
-       }
-       i = cfg->rs->fsymbolic [sel];
-       spill = ++cfg->spill_count;
-       cfg->rs->fassign [i] = -spill - 1;
-       mono_regstate_free_float(cfg->rs, sel);
-       /* we need to create a spill var and insert a load to sel after the current instruction */
-       MONO_INST_NEW (cfg, load, OP_LOADR8_MEMBASE);
-       load->dreg = sel;
-       load->inst_basereg = cfg->frame_reg;
-       load->inst_offset = mono_spillvar_offset_float (cfg, spill);
-       if (item->prev) {
-               while (ins->next != item->prev->data)
-                       ins = ins->next;
-       }
-       load->next = ins->next;
-       ins->next = load;
-       DEBUG (g_print ("SPILLED LOAD FP (%d at 0x%08x(%%sp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
-       i = mono_regstate_alloc_float (cfg->rs, 1 << sel);
-       g_assert (i == sel);
-       
-       return sel;
+       return mono_op_imm_to_op (op);
 }
 
-static MonoInst*
-create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
-{
-       MonoInst *copy;
-       MONO_INST_NEW (cfg, copy, OP_MOVE);
-       copy->dreg = dest;
-       copy->sreg1 = src;
-       if (ins) {
-               copy->next = ins->next;
-               ins->next = copy;
-       }
-       DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
-       return copy;
-}
-
-static MonoInst*
-create_copy_ins_float (MonoCompile *cfg, int dest, int src, MonoInst *ins)
-{
-       MonoInst *copy;
-       MONO_INST_NEW (cfg, copy, OP_FMOVE);
-       copy->dreg = dest;
-       copy->sreg1 = src;
-       if (ins) {
-               copy->next = ins->next;
-               ins->next = copy;
-       }
-       DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
-       return copy;
-}
-
-static MonoInst*
-create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
-{
-       MonoInst *store;
-       MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
-       store->sreg1 = reg;
-       store->inst_destbasereg = cfg->frame_reg;
-       store->inst_offset = mono_spillvar_offset (cfg, spill);
-       if (ins) {
-               store->next = ins->next;
-               ins->next = store;
-       }
-       DEBUG (g_print ("SPILLED STORE (%d at 0x%08x(%%sp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
-       return store;
-}
-
-static MonoInst*
-create_spilled_store_float (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
-{
-       MonoInst *store;
-       MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
-       store->sreg1 = reg;
-       store->inst_destbasereg = cfg->frame_reg;
-       store->inst_offset = mono_spillvar_offset_float (cfg, spill);
-       if (ins) {
-               store->next = ins->next;
-               ins->next = store;
-       }
-       DEBUG (g_print ("SPILLED STORE FP (%d at 0x%08x(%%sp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
-       return store;
-}
-
-static void
-insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
-{
-       MonoInst *prev;
-       g_assert (item->next);
-       prev = item->next->data;
-
-       while (prev->next != ins)
-               prev = prev->next;
-       to_insert->next = ins;
-       prev->next = to_insert;
-       /* 
-        * needed otherwise in the next instruction we can add an ins to the 
-        * end and that would get past this instruction.
-        */
-       item->data = to_insert; 
-}
-
-static int
-alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
-{
-       int val = cfg->rs->iassign [sym_reg];
-       if (val < 0) {
-               int spill = 0;
-               if (val < -1) {
-                       /* the register gets spilled after this inst */
-                       spill = -val -1;
-               }
-               val = mono_regstate_alloc_int (cfg->rs, allow_mask);
-               if (val < 0)
-                       val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
-               cfg->rs->iassign [sym_reg] = val;
-               /* add option to store before the instruction for src registers */
-               if (spill)
-                       create_spilled_store (cfg, spill, val, sym_reg, ins);
-       }
-       cfg->rs->isymbolic [val] = sym_reg;
-       return val;
-}
+//#define map_to_reg_reg_op(op) (cfg->new_ir? mono_op_imm_to_op (op): map_to_reg_reg_op (op))
 
+#define compare_opcode_is_unsigned(opcode) \
+               (((opcode) >= CEE_BNE_UN && (opcode) <= CEE_BLT_UN) ||  \
+               (((opcode) >= OP_IBNE_UN && (opcode) <= OP_IBLT_UN) ||  \
+               ((opcode) >= OP_COND_EXC_NE_UN && (opcode) <= OP_COND_EXC_LT_UN) ||     \
+               ((opcode) >= OP_COND_EXC_INE_UN && (opcode) <= OP_COND_EXC_ILT_UN) ||   \
+               ((opcode) == OP_CLT_UN || (opcode) == OP_CGT_UN || (opcode) == OP_ICLT_UN || (opcode) == OP_ICGT_UN)))
 /*
- * Local register allocation.
- * We first scan the list of instructions and we save the liveness info of
- * each register (when the register is first used, when it's value is set etc.).
- * We also reverse the list of instructions (in the InstList list) because assigning
- * registers backwards allows for more tricks to be used.
+ * Remove from the instruction list the instructions that can't be
+ * represented with very simple instructions with no register
+ * requirements.
  */
 void
-mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
+mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins;
-       MonoRegState *rs = cfg->rs;
-       int i, val;
-       RegTrack *reginfo, *reginfof;
-       RegTrack *reginfo1, *reginfo2, *reginfod;
-       InstList *tmp, *reversed = NULL;
-       const char *spec;
-       guint32 src1_mask, src2_mask, dest_mask;
-       guint32 cur_iregs, cur_fregs;
-
-       if (!bb->code)
-               return;
-       rs->next_vireg = bb->max_ireg;
-       rs->next_vfreg = bb->max_freg;
-       mono_regstate_assign (rs);
-       reginfo = mono_mempool_alloc0 (cfg->mempool, sizeof (RegTrack) * rs->next_vireg);
-       reginfof = mono_mempool_alloc0 (cfg->mempool, sizeof (RegTrack) * rs->next_vfreg);
-       rs->ifree_mask = PPC_CALLER_REGS;
-       rs->ffree_mask = PPC_CALLER_FREGS;
-
-       ins = bb->code;
-       i = 1;
-       DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
-       /* forward pass on the instructions to collect register liveness info */
-       while (ins) {
-               spec = ins_spec [ins->opcode];
-               DEBUG (print_ins (i, ins));
-               /*if (spec [MONO_INST_CLOB] == 'c') {
-                       MonoCallInst * call = (MonoCallInst*)ins;
-                       int j;
-               }*/
-               if (spec [MONO_INST_SRC1]) {
-                       if (spec [MONO_INST_SRC1] == 'f')
-                               reginfo1 = reginfof;
-                       else
-                               reginfo1 = reginfo;
-                       reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
-                       reginfo1 [ins->sreg1].last_use = i;
-               } else {
-                       ins->sreg1 = -1;
-               }
-               if (spec [MONO_INST_SRC2]) {
-                       if (spec [MONO_INST_SRC2] == 'f')
-                               reginfo2 = reginfof;
-                       else
-                               reginfo2 = reginfo;
-                       reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
-                       reginfo2 [ins->sreg2].last_use = i;
-               } else {
-                       ins->sreg2 = -1;
-               }
-               if (spec [MONO_INST_DEST]) {
-                       if (spec [MONO_INST_DEST] == 'f')
-                               reginfod = reginfof;
-                       else
-                               reginfod = reginfo;
-                       if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
-                               reginfod [ins->dreg].killed_in = i;
-                       reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
-                       reginfod [ins->dreg].last_use = i;
-                       if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
-                               reginfod [ins->dreg].born_in = i;
-                       if (spec [MONO_INST_DEST] == 'l') {
-                               /* result in eax:edx, the virtual register is allocated sequentially */
-                               reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
-                               reginfod [ins->dreg + 1].last_use = i;
-                               if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
-                                       reginfod [ins->dreg + 1].born_in = i;
-                       }
-               } else {
-                       ins->dreg = -1;
-               }
-               reversed = inst_list_prepend (cfg->mempool, reversed, ins);
-               ++i;
-               ins = ins->next;
-       }
+       MonoInst *ins, *next, *temp, *last_ins = NULL;
+       int imm;
+
+       /* setup the virtual reg allocator */
+       if (bb->max_vreg > cfg->rs->next_vreg)
+               cfg->rs->next_vreg = bb->max_vreg;
 
-       cur_iregs = PPC_CALLER_REGS;
-       cur_fregs = PPC_CALLER_FREGS;
-
-       DEBUG (print_regtrack (reginfo, rs->next_vireg));
-       DEBUG (print_regtrack (reginfof, rs->next_vfreg));
-       tmp = reversed;
-       while (tmp) {
-               int prev_dreg, prev_sreg1, prev_sreg2;
-               --i;
-               ins = tmp->data;
-               spec = ins_spec [ins->opcode];
-               DEBUG (g_print ("processing:"));
-               DEBUG (print_ins (i, ins));
-               /* make the register available for allocation: FIXME add fp reg */
-               if (ins->opcode == OP_SETREG || ins->opcode == OP_SETREGIMM) {
-                       cur_iregs |= 1 << ins->dreg;
-                       DEBUG (g_print ("adding %d to cur_iregs\n", ins->dreg));
-               } else if (ins->opcode == OP_SETFREG) {
-                       cur_fregs |= 1 << ins->dreg;
-                       DEBUG (g_print ("adding %d to cur_fregs\n", ins->dreg));
-               } else if (spec [MONO_INST_CLOB] == 'c') {
-                       MonoCallInst *cinst = (MonoCallInst*)ins;
-                       DEBUG (g_print ("excluding regs 0x%x from cur_iregs (0x%x)\n", cinst->used_iregs, cur_iregs));
-                       DEBUG (g_print ("excluding fpregs 0x%x from cur_fregs (0x%x)\n", cinst->used_fregs, cur_fregs));
-                       cur_iregs &= ~cinst->used_iregs;
-                       cur_fregs &= ~cinst->used_fregs;
-                       DEBUG (g_print ("available cur_iregs: 0x%x\n", cur_iregs));
-                       DEBUG (g_print ("available cur_fregs: 0x%x\n", cur_fregs));
-                       /* registers used by the calling convention are excluded from 
-                        * allocation: they will be selectively enabled when they are 
-                        * assigned by the special SETREG opcodes.
+       MONO_BB_FOR_EACH_INS (bb, ins) {
+loop_start:
+               switch (ins->opcode) {
+               case OP_IDIV_UN_IMM:
+               case OP_IDIV_IMM:
+               case OP_IREM_IMM:
+               case OP_IREM_UN_IMM:
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_imm;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg2 = temp->dreg;
+                       if (ins->opcode == OP_IDIV_IMM)
+                               ins->opcode = OP_IDIV;
+                       else if (ins->opcode == OP_IREM_IMM)
+                               ins->opcode = OP_IREM;
+                       else if (ins->opcode == OP_IDIV_UN_IMM)
+                               ins->opcode = OP_IDIV_UN;
+                       else if (ins->opcode == OP_IREM_UN_IMM)
+                               ins->opcode = OP_IREM_UN;
+                       last_ins = temp;
+                       /* handle rem separately */
+                       goto loop_start;
+               case OP_IREM:
+               case OP_IREM_UN: {
+                       MonoInst *mul;
+                       /* we change a rem dest, src1, src2 to
+                        * div temp1, src1, src2
+                        * mul temp2, temp1, src2
+                        * sub dest, src1, temp2
                         */
+                       NEW_INS (cfg, mul, OP_IMUL);
+                       NEW_INS (cfg, temp, ins->opcode == OP_IREM? OP_IDIV: OP_IDIV_UN);
+                       temp->sreg1 = ins->sreg1;
+                       temp->sreg2 = ins->sreg2;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       mul->sreg1 = temp->dreg;
+                       mul->sreg2 = ins->sreg2;
+                       mul->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->opcode = OP_ISUB;
+                       ins->sreg2 = mul->dreg;
+                       break;
                }
-               dest_mask = src1_mask = src2_mask = cur_iregs;
-               /* update for use with FP regs... */
-               if (spec [MONO_INST_DEST] == 'f') {
-                       dest_mask = cur_fregs;
-                       if (ins->dreg >= MONO_MAX_FREGS) {
-                               val = rs->fassign [ins->dreg];
-                               prev_dreg = ins->dreg;
-                               if (val < 0) {
-                                       int spill = 0;
-                                       if (val < -1) {
-                                               /* the register gets spilled after this inst */
-                                               spill = -val -1;
-                                       }
-                                       val = mono_regstate_alloc_float (rs, dest_mask);
-                                       if (val < 0)
-                                               val = get_float_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
-                                       rs->fassign [ins->dreg] = val;
-                                       if (spill)
-                                               create_spilled_store_float (cfg, spill, val, prev_dreg, ins);
-                               }
-                               DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
-                               rs->fsymbolic [val] = prev_dreg;
-                               ins->dreg = val;
-                               if (spec [MONO_INST_CLOB] == 'c' && ins->dreg != ppc_f1) {
-                                       /* this instruction only outputs to ppc_f1, need to copy */
-                                       create_copy_ins_float (cfg, ins->dreg, ppc_f1, ins);
-                               }
-                       } else {
-                               prev_dreg = -1;
-                       }
-                       if (freg_is_freeable (ins->dreg) && prev_dreg >= 0 && (reginfof [prev_dreg].born_in >= i || !(cur_fregs & (1 << ins->dreg)))) {
-                               DEBUG (g_print ("\tfreeable float %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfof [prev_dreg].born_in));
-                               mono_regstate_free_float (rs, ins->dreg);
-                       }
-               } else if (ins->dreg >= MONO_MAX_IREGS) {
-                       val = rs->iassign [ins->dreg];
-                       prev_dreg = ins->dreg;
-                       if (val < 0) {
-                               int spill = 0;
-                               if (val < -1) {
-                                       /* the register gets spilled after this inst */
-                                       spill = -val -1;
-                               }
-                               val = mono_regstate_alloc_int (rs, dest_mask);
-                               if (val < 0)
-                                       val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
-                               rs->iassign [ins->dreg] = val;
-                               if (spill)
-                                       create_spilled_store (cfg, spill, val, prev_dreg, ins);
-                       }
-                       DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
-                       rs->isymbolic [val] = prev_dreg;
-                       ins->dreg = val;
-                       if (spec [MONO_INST_DEST] == 'l') {
-                               int hreg = prev_dreg + 1;
-                               val = rs->iassign [hreg];
-                               if (val < 0) {
-                                       int spill = 0;
-                                       if (val < -1) {
-                                               /* the register gets spilled after this inst */
-                                               spill = -val -1;
-                                       }
-                                       val = mono_regstate_alloc_int (rs, dest_mask);
-                                       if (val < 0)
-                                               val = get_register_spilling (cfg, tmp, ins, dest_mask, hreg);
-                                       rs->iassign [hreg] = val;
-                                       if (spill)
-                                               create_spilled_store (cfg, spill, val, hreg, ins);
-                               }
-                               DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
-                               rs->isymbolic [val] = hreg;
-                               /* FIXME:? ins->dreg = val; */
-                               if (ins->dreg == ppc_r4) {
-                                       if (val != ppc_r3)
-                                               create_copy_ins (cfg, val, ppc_r3, ins);
-                               } else if (ins->dreg == ppc_r3) {
-                                       if (val == ppc_r4) {
-                                               /* swap */
-                                               create_copy_ins (cfg, ppc_r4, ppc_r0, ins);
-                                               create_copy_ins (cfg, ppc_r3, ppc_r4, ins);
-                                               create_copy_ins (cfg, ppc_r0, ppc_r3, ins);
-                                       } else {
-                                               /* two forced copies */
-                                               create_copy_ins (cfg, ins->dreg, ppc_r4, ins);
-                                               create_copy_ins (cfg, val, ppc_r3, ins);
-                                       }
-                               } else {
-                                       if (val == ppc_r3) {
-                                               create_copy_ins (cfg, ins->dreg, ppc_r4, ins);
-                                       } else {
-                                               /* two forced copies */
-                                               create_copy_ins (cfg, val, ppc_r3, ins);
-                                               create_copy_ins (cfg, ins->dreg, ppc_r4, ins);
-                                       }
-                               }
-                               if (reg_is_freeable (val) && hreg >= 0 && (reginfo [hreg].born_in >= i && !(cur_iregs & (1 << val)))) {
-                                       DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
-                                       mono_regstate_free_int (rs, val);
-                               }
-                       } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != ppc_r3 && spec [MONO_INST_CLOB] != 'd') {
-                               /* this instruction only outputs to ppc_r3, need to copy */
-                               create_copy_ins (cfg, ins->dreg, ppc_r3, ins);
+               case OP_IADD_IMM:
+               case OP_ADD_IMM:
+               case OP_ADDCC_IMM:
+                       if (!ppc_is_imm16 (ins->inst_imm)) {
+                               NEW_INS (cfg,  temp, OP_ICONST);
+                               temp->inst_c0 = ins->inst_imm;
+                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               ins->sreg2 = temp->dreg;
+                               ins->opcode = map_to_reg_reg_op (ins->opcode);
                        }
-               } else {
-                       prev_dreg = -1;
-               }
-               if (spec [MONO_INST_DEST] == 'f' && freg_is_freeable (ins->dreg) && prev_dreg >= 0 && (reginfof [prev_dreg].born_in >= i)) {
-                       DEBUG (g_print ("\tfreeable float %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfof [prev_dreg].born_in));
-                       mono_regstate_free_float (rs, ins->dreg);
-               } else if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && (reginfo [prev_dreg].born_in >= i)) {
-                       DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
-                       mono_regstate_free_int (rs, ins->dreg);
-               }
-               if (spec [MONO_INST_SRC1] == 'f') {
-                       src1_mask = cur_fregs;
-                       if (ins->sreg1 >= MONO_MAX_FREGS) {
-                               val = rs->fassign [ins->sreg1];
-                               prev_sreg1 = ins->sreg1;
-                               if (val < 0) {
-                                       int spill = 0;
-                                       if (val < -1) {
-                                               /* the register gets spilled after this inst */
-                                               spill = -val -1;
-                                       }
-                                       //g_assert (val == -1); /* source cannot be spilled */
-                                       val = mono_regstate_alloc_float (rs, src1_mask);
-                                       if (val < 0)
-                                               val = get_float_register_spilling (cfg, tmp, ins, src1_mask, ins->sreg1);
-                                       rs->fassign [ins->sreg1] = val;
-                                       DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
-                                       if (spill) {
-                                               MonoInst *store = create_spilled_store_float (cfg, spill, val, prev_sreg1, NULL);
-                                               insert_before_ins (ins, tmp, store);
-                                       }
-                               }
-                               rs->fsymbolic [val] = prev_sreg1;
-                               ins->sreg1 = val;
-                       } else {
-                               prev_sreg1 = -1;
-                       }
-               } else if (ins->sreg1 >= MONO_MAX_IREGS) {
-                       val = rs->iassign [ins->sreg1];
-                       prev_sreg1 = ins->sreg1;
-                       if (val < 0) {
-                               int spill = 0;
-                               if (val < -1) {
-                                       /* the register gets spilled after this inst */
-                                       spill = -val -1;
-                               }
-                               if (0 && ins->opcode == OP_MOVE) {
-                                       /* 
-                                        * small optimization: the dest register is already allocated
-                                        * but the src one is not: we can simply assign the same register
-                                        * here and peephole will get rid of the instruction later.
-                                        * This optimization may interfere with the clobbering handling:
-                                        * it removes a mov operation that will be added again to handle clobbering.
-                                        * There are also some other issues that should with make testjit.
-                                        */
-                                       mono_regstate_alloc_int (rs, 1 << ins->dreg);
-                                       val = rs->iassign [ins->sreg1] = ins->dreg;
-                                       //g_assert (val >= 0);
-                                       DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
-                               } else {
-                                       //g_assert (val == -1); /* source cannot be spilled */
-                                       val = mono_regstate_alloc_int (rs, src1_mask);
-                                       if (val < 0)
-                                               val = get_register_spilling (cfg, tmp, ins, src1_mask, ins->sreg1);
-                                       rs->iassign [ins->sreg1] = val;
-                                       DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
-                               }
-                               if (spill) {
-                                       MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
-                                       insert_before_ins (ins, tmp, store);
-                               }
+                       break;
+               case OP_ISUB_IMM:
+               case OP_SUB_IMM:
+                       if (!ppc_is_imm16 (-ins->inst_imm)) {
+                               NEW_INS (cfg, temp, OP_ICONST);
+                               temp->inst_c0 = ins->inst_imm;
+                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               ins->sreg2 = temp->dreg;
+                               ins->opcode = map_to_reg_reg_op (ins->opcode);
                        }
-                       rs->isymbolic [val] = prev_sreg1;
-                       ins->sreg1 = val;
-               } else {
-                       prev_sreg1 = -1;
-               }
-               if (spec [MONO_INST_SRC2] == 'f') {
-                       src2_mask = cur_fregs;
-                       if (ins->sreg2 >= MONO_MAX_FREGS) {
-                               val = rs->fassign [ins->sreg2];
-                               prev_sreg2 = ins->sreg2;
-                               if (val < 0) {
-                                       int spill = 0;
-                                       if (val < -1) {
-                                               /* the register gets spilled after this inst */
-                                               spill = -val -1;
-                                       }
-                                       val = mono_regstate_alloc_float (rs, src2_mask);
-                                       if (val < 0)
-                                               val = get_float_register_spilling (cfg, tmp, ins, src2_mask, ins->sreg2);
-                                       rs->fassign [ins->sreg2] = val;
-                                       DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
-                                       if (spill)
-                                               create_spilled_store_float (cfg, spill, val, prev_sreg2, ins);
+                       break;
+               case OP_IAND_IMM:
+               case OP_IOR_IMM:
+               case OP_IXOR_IMM:
+               case OP_AND_IMM:
+               case OP_OR_IMM:
+               case OP_XOR_IMM:
+                       if ((ins->inst_imm & 0xffff0000) && (ins->inst_imm & 0xffff)) {
+                               NEW_INS (cfg, temp, OP_ICONST);
+                               temp->inst_c0 = ins->inst_imm;
+                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               ins->sreg2 = temp->dreg;
+                               ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       }
+                       break;
+               case OP_ISBB_IMM:
+               case OP_IADC_IMM:
+               case OP_SBB_IMM:
+               case OP_SUBCC_IMM:
+               case OP_ADC_IMM:
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_imm;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg2 = temp->dreg;
+                       ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       break;
+               case OP_COMPARE_IMM:
+               case OP_ICOMPARE_IMM:
+                       next = ins->next;
+                       /* Branch opts can eliminate the branch */
+                       if (!next || (!(MONO_IS_COND_BRANCH_OP (next) || MONO_IS_COND_EXC (next) || MONO_IS_SETCC (next)))) {
+                               ins->opcode = OP_NOP;
+                               break;
+                       }
+                       g_assert(next);
+                       if (compare_opcode_is_unsigned (next->opcode)) {
+                               if (!ppc_is_uimm16 (ins->inst_imm)) {
+                                       NEW_INS (cfg, temp, OP_ICONST);
+                                       temp->inst_c0 = ins->inst_imm;
+                                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                                       ins->sreg2 = temp->dreg;
+                                       ins->opcode = map_to_reg_reg_op (ins->opcode);
                                }
-                               rs->fsymbolic [val] = prev_sreg2;
-                               ins->sreg2 = val;
                        } else {
-                               prev_sreg2 = -1;
-                       }
-               } else if (ins->sreg2 >= MONO_MAX_IREGS) {
-                       val = rs->iassign [ins->sreg2];
-                       prev_sreg2 = ins->sreg2;
-                       if (val < 0) {
-                               int spill = 0;
-                               if (val < -1) {
-                                       /* the register gets spilled after this inst */
-                                       spill = -val -1;
-                               }
-                               val = mono_regstate_alloc_int (rs, src2_mask);
-                               if (val < 0)
-                                       val = get_register_spilling (cfg, tmp, ins, src2_mask, ins->sreg2);
-                               rs->iassign [ins->sreg2] = val;
-                               DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
-                               if (spill)
-                                       create_spilled_store (cfg, spill, val, prev_sreg2, ins);
-                       }
-                       rs->isymbolic [val] = prev_sreg2;
-                       ins->sreg2 = val;
-               } else {
-                       prev_sreg2 = -1;
-               }
-
-               if (spec [MONO_INST_CLOB] == 'c') {
-                       int j, s;
-                       guint32 clob_mask = PPC_CALLER_REGS;
-                       for (j = 0; j < MONO_MAX_IREGS; ++j) {
-                               s = 1 << j;
-                               if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
-                                       //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
+                               if (!ppc_is_imm16 (ins->inst_imm)) {
+                                       NEW_INS (cfg, temp, OP_ICONST);
+                                       temp->inst_c0 = ins->inst_imm;
+                                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                                       ins->sreg2 = temp->dreg;
+                                       ins->opcode = map_to_reg_reg_op (ins->opcode);
                                }
                        }
+                       break;
+               case OP_IMUL_IMM:
+               case OP_MUL_IMM:
+                       if (ins->inst_imm == 1) {
+                               ins->opcode = OP_MOVE;
+                               break;
+                       }
+                       if (ins->inst_imm == 0) {
+                               ins->opcode = OP_ICONST;
+                               ins->inst_c0 = 0;
+                               break;
+                       }
+                       imm = mono_is_power_of_two (ins->inst_imm);
+                       if (imm > 0) {
+                               ins->opcode = OP_SHL_IMM;
+                               ins->inst_imm = imm;
+                               break;
+                       }
+                       if (!ppc_is_imm16 (ins->inst_imm)) {
+                               NEW_INS (cfg, temp, OP_ICONST);
+                               temp->inst_c0 = ins->inst_imm;
+                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               ins->sreg2 = temp->dreg;
+                               ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       }
+                       break;
+               case OP_LOCALLOC_IMM:
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_imm;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg1 = temp->dreg;
+                       ins->opcode = OP_LOCALLOC;
+                       break;
+               case OP_LOAD_MEMBASE:
+               case OP_LOADI4_MEMBASE:
+               case OP_LOADU4_MEMBASE:
+               case OP_LOADI2_MEMBASE:
+               case OP_LOADU2_MEMBASE:
+               case OP_LOADI1_MEMBASE:
+               case OP_LOADU1_MEMBASE:
+               case OP_LOADR4_MEMBASE:
+               case OP_LOADR8_MEMBASE:
+               case OP_STORE_MEMBASE_REG:
+               case OP_STOREI4_MEMBASE_REG:
+               case OP_STOREI2_MEMBASE_REG:
+               case OP_STOREI1_MEMBASE_REG:
+               case OP_STORER4_MEMBASE_REG:
+               case OP_STORER8_MEMBASE_REG:
+                       /* we can do two things: load the immed in a register
+                        * and use an indexed load, or see if the immed can be
+                        * represented as an ad_imm + a load with a smaller offset
+                        * that fits. We just do the first for now, optimize later.
+                        */
+                       if (ppc_is_imm16 (ins->inst_offset))
+                               break;
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_offset;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg2 = temp->dreg;
+                       ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       break;
+               case OP_STORE_MEMBASE_IMM:
+               case OP_STOREI1_MEMBASE_IMM:
+               case OP_STOREI2_MEMBASE_IMM:
+               case OP_STOREI4_MEMBASE_IMM:
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_imm;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg1 = temp->dreg;
+                       ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       last_ins = temp;
+                       goto loop_start; /* make it handle the possibly big ins->inst_offset */
+               case OP_R8CONST:
+               case OP_R4CONST:
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = (guint32)ins->inst_p0;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->inst_basereg = temp->dreg;
+                       ins->inst_offset = 0;
+                       ins->opcode = ins->opcode == OP_R4CONST? OP_LOADR4_MEMBASE: OP_LOADR8_MEMBASE;
+                       last_ins = temp;
+                       /* make it handle the possibly big ins->inst_offset
+                        * later optimize to use lis + load_membase
+                        */
+                       goto loop_start;
                }
-               /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
-                       DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
-                       mono_regstate_free_int (rs, ins->sreg1);
-               }
-               if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
-                       DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
-                       mono_regstate_free_int (rs, ins->sreg2);
-               }*/
-               
-               //DEBUG (print_ins (i, ins));
-               tmp = tmp->next;
+               last_ins = ins;
        }
-       cfg->max_ireg = MAX (cfg->max_ireg, rs->max_ireg);
+       bb->last_ins = last_ins;
+       bb->max_vreg = cfg->rs->next_vreg;
+       
 }
 
 static guchar*
@@ -2058,51 +2408,9 @@ emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size,
        return code;
 }
 
-static unsigned char*
-mono_emit_stack_alloc (guchar *code, MonoInst* tree)
-{
-#if 0
-       int sreg = tree->sreg1;
-       x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
-       if (tree->flags & MONO_INST_INIT) {
-               int offset = 0;
-               if (tree->dreg != X86_EAX && sreg != X86_EAX) {
-                       x86_push_reg (code, X86_EAX);
-                       offset += 4;
-               }
-               if (tree->dreg != X86_ECX && sreg != X86_ECX) {
-                       x86_push_reg (code, X86_ECX);
-                       offset += 4;
-               }
-               if (tree->dreg != X86_EDI && sreg != X86_EDI) {
-                       x86_push_reg (code, X86_EDI);
-                       offset += 4;
-               }
-               
-               x86_shift_reg_imm (code, X86_SHR, sreg, 2);
-               if (sreg != X86_ECX)
-                       x86_mov_reg_reg (code, X86_ECX, sreg, 4);
-               x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
-                               
-               x86_lea_membase (code, X86_EDI, X86_ESP, offset);
-               x86_cld (code);
-               x86_prefix (code, X86_REP_PREFIX);
-               x86_stosl (code);
-               
-               if (tree->dreg != X86_EDI && sreg != X86_EDI)
-                       x86_pop_reg (code, X86_EDI);
-               if (tree->dreg != X86_ECX && sreg != X86_ECX)
-                       x86_pop_reg (code, X86_ECX);
-               if (tree->dreg != X86_EAX && sreg != X86_EAX)
-                       x86_pop_reg (code, X86_EAX);
-       }
-#endif
-       return code;
-}
-
 typedef struct {
        guchar *code;
-       guchar *target;
+       const guchar *target;
        int absolute;
        int found;
 } PatchData;
@@ -2117,7 +2425,7 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
        guint32 *endthunks = (guint32*)(code + bsize);
        guint32 load [2];
        guchar *templ;
-       int i, count = 0;
+       int count = 0;
        int difflow, diffhigh;
 
        /* always ensure a call from pdata->code can reach to the thunks without further thunks */
@@ -2138,6 +2446,12 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
                                ppc_patch (pdata->code, (guchar*)thunks);
                                mono_arch_flush_icache (pdata->code, 4);
                                pdata->found = 1;
+                               /*{
+                                       static int num_thunks = 0;
+                                       num_thunks++;
+                                       if ((num_thunks % 20) == 0)
+                                               g_print ("num_thunks lookup: %d\n", num_thunks);
+                               }*/
                                return 1;
                        } else if ((thunks [0] == 0) && (thunks [1] == 0)) {
                                /* found a free slot instead: emit thunk */
@@ -2151,6 +2465,12 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
                                ppc_patch (pdata->code, (guchar*)thunks);
                                mono_arch_flush_icache (pdata->code, 4);
                                pdata->found = 1;
+                               /*{
+                                       static int num_thunks = 0;
+                                       num_thunks++;
+                                       if ((num_thunks % 20) == 0)
+                                               g_print ("num_thunks: %d\n", num_thunks);
+                               }*/
                                return 1;
                        }
                        /* skip 16 bytes, the size of the thunk */
@@ -2163,7 +2483,7 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
 }
 
 static void
-handle_thunk (int absolute, guchar *code, guchar *target) {
+handle_thunk (int absolute, guchar *code, const guchar *target) {
        MonoDomain *domain = mono_domain_get ();
        PatchData pdata;
 
@@ -2188,7 +2508,7 @@ handle_thunk (int absolute, guchar *code, guchar *target) {
 }
 
 void
-ppc_patch (guchar *code, guchar *target)
+ppc_patch (guchar *code, const guchar *target)
 {
        guint32 ins = *(guint32*)code;
        guint32 prim = ins >> 26;
@@ -2255,16 +2575,169 @@ ppc_patch (guchar *code, guchar *target)
                        ins |= diff;
                }
                *(guint32*)code = ins;
+               return;
+       }
+
+       if (prim == 15 || ins == 0x4e800021 || ins == 0x4e800020 || ins == 0x4e800420) {
+               guint32 *seq;
+               /* the trampoline code will try to patch the blrl, blr, bcctr */
+               if (ins == 0x4e800021 || ins == 0x4e800020 || ins == 0x4e800420) {
+                       code -= 12;
+               }
+               /* this is the lis/ori/mtlr/blrl sequence */
+               seq = (guint32*)code;
+               g_assert ((seq [0] >> 26) == 15);
+               g_assert ((seq [1] >> 26) == 24);
+               g_assert ((seq [2] >> 26) == 31);
+               g_assert (seq [3] == 0x4e800021 || seq [3] == 0x4e800020 || seq [3] == 0x4e800420);
+               /* FIXME: make this thread safe */
+               ppc_lis (code, ppc_r0, (guint32)(target) >> 16);
+               ppc_ori (code, ppc_r0, ppc_r0, (guint32)(target) & 0xffff);
+               mono_arch_flush_icache (code - 8, 8);
        } else {
                g_assert_not_reached ();
        }
 //     g_print ("patched with 0x%08x\n", ins);
 }
 
+static guint8*
+emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
+{
+       switch (ins->opcode) {
+       case OP_FCALL:
+       case OP_FCALL_REG:
+       case OP_FCALL_MEMBASE:
+               if (ins->dreg != ppc_f1)
+                       ppc_fmr (code, ins->dreg, ppc_f1);
+               break;
+       }
+
+       return code;
+}
+
+/*
+ * emit_load_volatile_arguments:
+ *
+ *  Load volatile arguments from the stack to the original input registers.
+ * Required before a tail call.
+ */
+static guint8*
+emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
+{
+       MonoMethod *method = cfg->method;
+       MonoMethodSignature *sig;
+       MonoInst *inst;
+       CallInfo *cinfo;
+       guint32 i, pos;
+       int struct_index = 0;
+
+       /* FIXME: Generate intermediate code instead */
+
+       sig = mono_method_signature (method);
+
+       /* This is the opposite of the code in emit_prolog */
+
+       pos = 0;
+
+       cinfo = calculate_sizes (sig, sig->pinvoke);
+
+       if (MONO_TYPE_ISSTRUCT (sig->ret)) {
+               ArgInfo *ainfo = &cinfo->ret;
+               inst = cfg->vret_addr;
+               g_assert (ppc_is_imm16 (inst->inst_offset));
+               ppc_lwz (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+       }
+       for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
+               ArgInfo *ainfo = cinfo->args + i;
+               inst = cfg->args [pos];
+
+               g_assert (inst->opcode != OP_REGVAR);
+               g_assert (ppc_is_imm16 (inst->inst_offset));
+
+               switch (ainfo->regtype) {
+               case RegTypeGeneral:
+                       switch (ainfo->size) {
+                               case 1:
+                                       ppc_lbz (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       break;
+                               case 2:
+                                       ppc_lhz (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       break;
+                               default:
+                                       ppc_lwz (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       break;
+                       }
+                       break;
+
+               case RegTypeFP:
+                       switch (ainfo->size) {
+                               case 4:
+                                       ppc_lfs (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       break;
+                               case 8:
+                                       ppc_lfd (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       break;
+                               default:
+                                       g_assert_not_reached ();
+                       }
+                       break;
+
+               case RegTypeBase:
+                       /* FIXME: */
+                       NOT_IMPLEMENTED;
+
+               case RegTypeStructByVal: {
+                       guint32 size = 0;
+
+                       /* FIXME: */
+                       if (ainfo->vtsize)
+                               NOT_IMPLEMENTED;
+#ifdef __APPLE__
+                       /*
+                        * Darwin pinvokes needs some special handling
+                        * for 1 and 2 byte arguments
+                        */
+                       if (method->signature->pinvoke)
+                               size = mono_class_native_size (inst->klass, NULL);
+                       if (size == 1 || size == 2) {
+                               /* FIXME: */
+                               NOT_IMPLEMENTED;
+                       } else
+#endif
+                               for (i = 0; i < ainfo->size; ++i) {
+                                       ppc_lwz (code, ainfo->reg  + i,
+                                               inst->inst_offset + i * sizeof (gpointer), inst->inst_basereg);
+                               }
+                       break;
+               }
+
+               case RegTypeStructByAddr: {
+                       MonoInst *addr = cfg->tailcall_valuetype_addrs [struct_index];
+
+                       g_assert (ppc_is_imm16 (addr->inst_offset));
+                       g_assert (!ainfo->offset);
+                       ppc_lwz (code, ainfo->reg, addr->inst_offset, addr->inst_basereg);
+
+                       struct_index++;
+                       break;
+               }
+
+               default:
+                       g_assert_not_reached ();
+               }
+
+               pos ++;
+       }
+
+       g_free (cinfo);
+
+       return code;
+}
+
 void
 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 {
-       MonoInst *ins;
+       MonoInst *ins, *next;
        MonoCallInst *call;
        guint offset;
        guint8 *code = cfg->native_code + cfg->code_len;
@@ -2272,9 +2745,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        guint last_offset = 0;
        int max_len, cpos;
 
-       if (cfg->opt & MONO_OPT_PEEPHOLE)
-               peephole_pass (cfg, bb);
-
        /* we don't align basic blocks of loops on ppc */
 
        if (cfg->verbose_level > 2)
@@ -2293,11 +2763,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                //x86_inc_mem (code, &cov->data [bb->dfn].count); 
        }
 
-       ins = bb->code;
-       while (ins) {
+       MONO_BB_FOR_EACH_INS (bb, ins) {
                offset = code - cfg->native_code;
 
-               max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
+               max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
 
                if (offset > (cfg->code_size - max_len - 16)) {
                        cfg->code_size *= 2;
@@ -2309,56 +2778,43 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                mono_debug_record_line_number (cfg, ins, offset);
 
                switch (ins->opcode) {
+               case OP_RELAXED_NOP:
+               case OP_NOP:
+               case OP_DUMMY_USE:
+               case OP_DUMMY_STORE:
+               case OP_NOT_REACHED:
+               case OP_NOT_NULL:
+                       break;
+               case OP_TLS_GET:
+                       emit_tls_access (code, ins->dreg, ins->inst_offset);
+                       break;
                case OP_BIGMUL:
-                       ppc_mullw (code, ppc_r4, ins->sreg1, ins->sreg2);
+                       ppc_mullw (code, ppc_r0, ins->sreg1, ins->sreg2);
                        ppc_mulhw (code, ppc_r3, ins->sreg1, ins->sreg2);
+                       ppc_mr (code, ppc_r4, ppc_r0);
                        break;
                case OP_BIGMUL_UN:
-                       ppc_mullw (code, ppc_r4, ins->sreg1, ins->sreg2);
+                       ppc_mullw (code, ppc_r0, ins->sreg1, ins->sreg2);
                        ppc_mulhwu (code, ppc_r3, ins->sreg1, ins->sreg2);
+                       ppc_mr (code, ppc_r4, ppc_r0);
                        break;
-               case OP_STOREI1_MEMBASE_IMM:
-                       ppc_li (code, ppc_r0, ins->inst_imm);
-                       if (ppc_is_imm16 (ins->inst_offset)) {
-                               ppc_stb (code, ppc_r0, ins->inst_offset, ins->inst_destbasereg);
-                       } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_stbx (code, ppc_r0, ppc_r11, ins->inst_destbasereg);
-                       }
-                       break;
-               case OP_STOREI2_MEMBASE_IMM:
-                       ppc_li (code, ppc_r0, ins->inst_imm);
-                       if (ppc_is_imm16 (ins->inst_offset)) {
-                               ppc_sth (code, ppc_r0, ins->inst_offset, ins->inst_destbasereg);
-                       } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_sthx (code, ppc_r0, ppc_r11, ins->inst_destbasereg);
-                       }
-                       break;
-               case OP_STORE_MEMBASE_IMM:
-               case OP_STOREI4_MEMBASE_IMM:
-                       ppc_load (code, ppc_r0, ins->inst_imm);
-                       if (ppc_is_imm16 (ins->inst_offset)) {
-                               ppc_stw (code, ppc_r0, ins->inst_offset, ins->inst_destbasereg);
-                       } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_stwx (code, ppc_r0, ppc_r11, ins->inst_destbasereg);
-                       }
+               case OP_MEMORY_BARRIER:
+                       ppc_sync (code);
                        break;
                case OP_STOREI1_MEMBASE_REG:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stb (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_stbx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
                case OP_STOREI2_MEMBASE_REG:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_sth (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_sthx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
                case OP_STORE_MEMBASE_REG:
@@ -2366,20 +2822,22 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stw (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_stwx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_stwx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
-               case CEE_LDIND_I:
-               case CEE_LDIND_I4:
-               case CEE_LDIND_U4:
-                       g_assert_not_reached ();
-                       //x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
+               case OP_STOREI1_MEMINDEX:
+                       ppc_stbx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       break;
+               case OP_STOREI2_MEMINDEX:
+                       ppc_sthx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       break;
+               case OP_STORE_MEMINDEX:
+               case OP_STOREI4_MEMINDEX:
+                       ppc_stwx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
                        break;
                case OP_LOADU4_MEM:
                        g_assert_not_reached ();
-                       //x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
-                       //x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
                        break;
                case OP_LOAD_MEMBASE:
                case OP_LOADI4_MEMBASE:
@@ -2387,8 +2845,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lwz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_lwzx (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_lwzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
                        }
                        break;
                case OP_LOADI1_MEMBASE:
@@ -2396,8 +2854,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lbz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_lbzx (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_lbzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
                        }
                        if (ins->opcode == OP_LOADI1_MEMBASE)
                                ppc_extsb (code, ins->dreg, ins->dreg);
@@ -2406,95 +2864,103 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lhz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_lhzx (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_lhzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
                        }
                        break;
                case OP_LOADI2_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lha (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_lhax (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_lhax (code, ins->dreg, ins->inst_basereg, ppc_r0);
                        }
                        break;
-               case CEE_CONV_I1:
+               case OP_LOAD_MEMINDEX:
+               case OP_LOADI4_MEMINDEX:
+               case OP_LOADU4_MEMINDEX:
+                       ppc_lwzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       break;
+               case OP_LOADU2_MEMINDEX:
+                       ppc_lhzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       break;
+               case OP_LOADI2_MEMINDEX:
+                       ppc_lhax (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       break;
+               case OP_LOADU1_MEMINDEX:
+                       ppc_lbzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       break;
+               case OP_LOADI1_MEMINDEX:
+                       ppc_lbzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_extsb (code, ins->dreg, ins->dreg);
+                       break;
+               case OP_ICONV_TO_I1:
                        ppc_extsb (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_CONV_I2:
+               case OP_ICONV_TO_I2:
                        ppc_extsh (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_CONV_U1:
+               case OP_ICONV_TO_U1:
                        ppc_rlwinm (code, ins->dreg, ins->sreg1, 0, 24, 31);
                        break;
-               case CEE_CONV_U2:
+               case OP_ICONV_TO_U2:
                        ppc_rlwinm (code, ins->dreg, ins->sreg1, 0, 16, 31);
                        break;
                case OP_COMPARE:
-                       if (ins->next && 
-                                       ((ins->next->opcode >= CEE_BNE_UN && ins->next->opcode <= CEE_BLT_UN) ||
-                                       (ins->next->opcode >= OP_COND_EXC_NE_UN && ins->next->opcode <= OP_COND_EXC_LT_UN) ||
-                                       (ins->next->opcode == OP_CLT_UN || ins->next->opcode == OP_CGT_UN)))
+               case OP_ICOMPARE:
+                       next = ins->next;
+                       if (next && compare_opcode_is_unsigned (next->opcode))
                                ppc_cmpl (code, 0, 0, ins->sreg1, ins->sreg2);
                        else
                                ppc_cmp (code, 0, 0, ins->sreg1, ins->sreg2);
                        break;
                case OP_COMPARE_IMM:
-                       if (ins->next && 
-                                       ((ins->next->opcode >= CEE_BNE_UN && ins->next->opcode <= CEE_BLT_UN) ||
-                                       (ins->next->opcode >= OP_COND_EXC_NE_UN && ins->next->opcode <= OP_COND_EXC_LT_UN) ||
-                                       (ins->next->opcode == OP_CLT_UN || ins->next->opcode == OP_CGT_UN))) {
+               case OP_ICOMPARE_IMM:
+                       next = ins->next;
+                       if (next && compare_opcode_is_unsigned (next->opcode)) {
                                if (ppc_is_uimm16 (ins->inst_imm)) {
                                        ppc_cmpli (code, 0, 0, ins->sreg1, (ins->inst_imm & 0xffff));
                                } else {
-                                       ppc_load (code, ppc_r11, ins->inst_imm);
-                                       ppc_cmpl (code, 0, 0, ins->sreg1, ppc_r11);
+                                       g_assert_not_reached ();
                                }
                        } else {
                                if (ppc_is_imm16 (ins->inst_imm)) {
                                        ppc_cmpi (code, 0, 0, ins->sreg1, (ins->inst_imm & 0xffff));
                                } else {
-                                       ppc_load (code, ppc_r11, ins->inst_imm);
-                                       ppc_cmp (code, 0, 0, ins->sreg1, ppc_r11);
+                                       g_assert_not_reached ();
                                }
                        }
                        break;
-               case OP_X86_TEST_NULL:
-                       ppc_cmpi (code, 0, 0, ins->sreg1, 0);
-                       break;
-               case CEE_BREAK:
+               case OP_BREAK:
                        ppc_break (code);
                        break;
                case OP_ADDCC:
+               case OP_IADDCC:
                        ppc_addc (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
-               case CEE_ADD:
+               case OP_IADD:
                        ppc_add (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ADC:
+               case OP_IADC:
                        ppc_adde (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ADDCC_IMM:
                        if (ppc_is_imm16 (ins->inst_imm)) {
                                ppc_addic (code, ins->dreg, ins->sreg1, ins->inst_imm);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_imm);
-                               ppc_addc (code, ins->dreg, ins->sreg1, ppc_r11);
+                               g_assert_not_reached ();
                        }
                        break;
                case OP_ADD_IMM:
+               case OP_IADD_IMM:
                        if (ppc_is_imm16 (ins->inst_imm)) {
                                ppc_addi (code, ins->dreg, ins->sreg1, ins->inst_imm);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_imm);
-                               ppc_add (code, ins->dreg, ins->sreg1, ppc_r11);
+                               g_assert_not_reached ();
                        }
                        break;
-               case OP_ADC_IMM:
-                       ppc_load (code, ppc_r11, ins->inst_imm);
-                       ppc_adde (code, ins->dreg, ins->sreg1, ppc_r11);
-                       break;
-               case CEE_ADD_OVF:
+               case OP_IADD_OVF:
                        /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
                         */
                        ppc_addo (code, ins->dreg, ins->sreg1, ins->sreg2);
@@ -2502,7 +2968,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
-               case CEE_ADD_OVF_UN:
+               case OP_IADD_OVF_UN:
                        /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
                         */
                        ppc_addco (code, ins->dreg, ins->sreg1, ins->sreg2);
@@ -2510,7 +2976,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<13));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
-               case CEE_SUB_OVF:
+               case OP_ISUB_OVF:
                        /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
                         */
                        ppc_subfo (code, ins->dreg, ins->sreg2, ins->sreg1);
@@ -2518,7 +2984,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
-               case CEE_SUB_OVF_UN:
+               case OP_ISUB_OVF_UN:
                        /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
                         */
                        ppc_subfc (code, ins->dreg, ins->sreg2, ins->sreg1);
@@ -2559,31 +3025,25 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "OverflowException");
                        break;
                case OP_SUBCC:
+               case OP_ISUBCC:
                        ppc_subfc (code, ins->dreg, ins->sreg2, ins->sreg1);
                        break;
-               case OP_SUBCC_IMM:
-                       ppc_load (code, ppc_r11, ins->inst_imm);
-                       ppc_subfc (code, ins->dreg, ppc_r11, ins->sreg1);
-                       break;
-               case CEE_SUB:
+               case OP_ISUB:
                        ppc_subf (code, ins->dreg, ins->sreg2, ins->sreg1);
                        break;
                case OP_SBB:
+               case OP_ISBB:
                        ppc_subfe (code, ins->dreg, ins->sreg2, ins->sreg1);
                        break;
                case OP_SUB_IMM:
+               case OP_ISUB_IMM:
                        // we add the negated value
                        if (ppc_is_imm16 (-ins->inst_imm))
                                ppc_addi (code, ins->dreg, ins->sreg1, -ins->inst_imm);
                        else {
-                               ppc_load (code, ppc_r11, ins->inst_imm);
-                               ppc_sub (code, ins->dreg, ins->sreg1, ppc_r11);
+                               g_assert_not_reached ();
                        }
                        break;
-               case OP_SBB_IMM:
-                       ppc_load (code, ppc_r11, ins->inst_imm);
-                       ppc_subfe (code, ins->dreg, ppc_r11, ins->sreg1);
-                       break;
                case OP_PPC_SUBFIC:
                        g_assert (ppc_is_imm16 (ins->inst_imm));
                        ppc_subfic (code, ins->dreg, ins->sreg1, ins->inst_imm);
@@ -2591,29 +3051,29 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_PPC_SUBFZE:
                        ppc_subfze (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_AND:
+               case OP_IAND:
                        /* FIXME: the ppc macros as inconsistent here: put dest as the first arg! */
                        ppc_and (code, ins->sreg1, ins->dreg, ins->sreg2);
                        break;
                case OP_AND_IMM:
+               case OP_IAND_IMM:
                        if (!(ins->inst_imm & 0xffff0000)) {
                                ppc_andid (code, ins->sreg1, ins->dreg, ins->inst_imm);
                        } else if (!(ins->inst_imm & 0xffff)) {
                                ppc_andisd (code, ins->sreg1, ins->dreg, ((guint32)ins->inst_imm >> 16));
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_imm);
-                               ppc_and (code, ins->sreg1, ins->dreg, ppc_r11);
+                               g_assert_not_reached ();
                        }
                        break;
-               case CEE_DIV: {
-                       guint32 *divisor_is_m1;
+               case OP_IDIV: {
+                       guint8 *divisor_is_m1;
                          /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
                          */
                        ppc_cmpi (code, 0, 0, ins->sreg2, -1);
                        divisor_is_m1 = code;
                        ppc_bc (code, PPC_BR_FALSE | PPC_BR_LIKELY, PPC_BR_EQ, 0);
-                       ppc_lis (code, ppc_r11, 0x8000);
-                       ppc_cmp (code, 0, 0, ins->sreg1, ppc_r11);
+                       ppc_lis (code, ppc_r0, 0x8000);
+                       ppc_cmp (code, 0, 0, ins->sreg1, ppc_r0);
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "ArithmeticException");
                        ppc_patch (divisor_is_m1, code);
                         /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
@@ -2624,120 +3084,85 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
                        break;
                }
-               case CEE_DIV_UN:
+               case OP_IDIV_UN:
                        ppc_divwuod (code, ins->dreg, ins->sreg1, ins->sreg2);
                        ppc_mfspr (code, ppc_r0, ppc_xer);
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
                        break;
                case OP_DIV_IMM:
-                       g_assert_not_reached ();
-#if 0
-                       ppc_load (code, ppc_r11, ins->inst_imm);
-                       ppc_divwod (code, ins->dreg, ins->sreg1, ppc_r11);
-                       ppc_mfspr (code, ppc_r0, ppc_xer);
-                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
-                       /* FIXME: use OverflowException for 0x80000000/-1 */
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
-                       break;
-#endif
-               case CEE_REM: {
-                       guint32 *divisor_is_m1;
-                       ppc_cmpi (code, 0, 0, ins->sreg2, -1);
-                       divisor_is_m1 = code;
-                       ppc_bc (code, PPC_BR_FALSE | PPC_BR_LIKELY, PPC_BR_EQ, 0);
-                       ppc_lis (code, ppc_r11, 0x8000);
-                       ppc_cmp (code, 0, 0, ins->sreg1, ppc_r11);
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "ArithmeticException");
-                       ppc_patch (divisor_is_m1, code);
-                       ppc_divwod (code, ppc_r11, ins->sreg1, ins->sreg2);
-                       ppc_mfspr (code, ppc_r0, ppc_xer);
-                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
-                       /* FIXME: use OverflowException for 0x80000000/-1 */
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
-                       ppc_mullw (code, ppc_r11, ppc_r11, ins->sreg2);
-                       ppc_subf (code, ins->dreg, ppc_r11, ins->sreg1);
-                       break;
-               }
-               case CEE_REM_UN:
-                       ppc_divwuod (code, ppc_r11, ins->sreg1, ins->sreg2);
-                       ppc_mfspr (code, ppc_r0, ppc_xer);
-                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
-                       ppc_mullw (code, ppc_r11, ppc_r11, ins->sreg2);
-                       ppc_subf (code, ins->dreg, ppc_r11, ins->sreg1);
-                       break;
+               case OP_IREM:
+               case OP_IREM_UN:
                case OP_REM_IMM:
                        g_assert_not_reached ();
-               case CEE_OR:
+               case OP_IOR:
                        ppc_or (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_OR_IMM:
+               case OP_IOR_IMM:
                        if (!(ins->inst_imm & 0xffff0000)) {
                                ppc_ori (code, ins->sreg1, ins->dreg, ins->inst_imm);
                        } else if (!(ins->inst_imm & 0xffff)) {
-                               ppc_oris (code, ins->sreg1, ins->dreg, ((guint32)(ins->inst_imm) >> 16));
+                               ppc_oris (code, ins->dreg, ins->sreg1, ((guint32)(ins->inst_imm) >> 16));
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_imm);
-                               ppc_or (code, ins->sreg1, ins->dreg, ppc_r11);
+                               g_assert_not_reached ();
                        }
                        break;
-               case CEE_XOR:
+               case OP_IXOR:
                        ppc_xor (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
+               case OP_IXOR_IMM:
                case OP_XOR_IMM:
                        if (!(ins->inst_imm & 0xffff0000)) {
                                ppc_xori (code, ins->sreg1, ins->dreg, ins->inst_imm);
                        } else if (!(ins->inst_imm & 0xffff)) {
                                ppc_xoris (code, ins->sreg1, ins->dreg, ((guint32)(ins->inst_imm) >> 16));
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_imm);
-                               ppc_xor (code, ins->sreg1, ins->dreg, ppc_r11);
+                               g_assert_not_reached ();
                        }
                        break;
-               case CEE_SHL:
+               case OP_ISHL:
                        ppc_slw (code, ins->sreg1, ins->dreg, ins->sreg2);
                        break;
                case OP_SHL_IMM:
+               case OP_ISHL_IMM:
                        ppc_rlwinm (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f), 0, (31 - (ins->inst_imm & 0x1f)));
-                       //ppc_load (code, ppc_r11, ins->inst_imm);
-                       //ppc_slw (code, ins->sreg1, ins->dreg, ppc_r11);
                        break;
-               case CEE_SHR:
+               case OP_ISHR:
                        ppc_sraw (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SHR_IMM:
-                       // there is also ppc_srawi
-                       //ppc_load (code, ppc_r11, ins->inst_imm);
-                       //ppc_sraw (code, ins->dreg, ins->sreg1, ppc_r11);
+               case OP_ISHR_IMM:
                        ppc_srawi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
                        break;
                case OP_SHR_UN_IMM:
-                       /*ppc_load (code, ppc_r11, ins->inst_imm);
-                       ppc_srw (code, ins->dreg, ins->sreg1, ppc_r11);*/
-                       ppc_rlwinm (code, ins->dreg, ins->sreg1, (32 - (ins->inst_imm & 0x1f)), (ins->inst_imm & 0x1f), 31);
+               case OP_ISHR_UN_IMM:
+                       if (ins->inst_imm)
+                               ppc_rlwinm (code, ins->dreg, ins->sreg1, (32 - (ins->inst_imm & 0x1f)), (ins->inst_imm & 0x1f), 31);
+                       else
+                               ppc_mr (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_SHR_UN:
+               case OP_ISHR_UN:
                        ppc_srw (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
-               case CEE_NOT:
+               case OP_INOT:
                        ppc_not (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_NEG:
+               case OP_INEG:
                        ppc_neg (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_MUL:
+               case OP_IMUL:
                        ppc_mullw (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
+               case OP_IMUL_IMM:
                case OP_MUL_IMM:
                        if (ppc_is_imm16 (ins->inst_imm)) {
                            ppc_mulli (code, ins->dreg, ins->sreg1, ins->inst_imm);
                        } else {
-                           ppc_load (code, ppc_r11, ins->inst_imm);
-                           ppc_mullw (code, ins->dreg, ins->sreg1, ppc_r11);
+                           g_assert_not_reached ();
                        }
                        break;
-               case CEE_MUL_OVF:
+               case OP_IMUL_OVF:
                        /* we annot use mcrxr, since it's not implemented on some processors 
                         * XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
                         */
@@ -2746,7 +3171,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
-               case CEE_MUL_OVF_UN:
+               case OP_IMUL_OVF_UN:
                        /* we first multiply to get the high word and compare to 0
                         * to set the flags, then the result is discarded and then 
                         * we multiply to get the lower * bits result
@@ -2757,7 +3182,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mullw (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ICONST:
-               case OP_SETREGIMM:
                        ppc_load (code, ins->dreg, ins->inst_c0);
                        break;
                case OP_AOTCONST:
@@ -2765,10 +3189,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_lis (code, ins->dreg, 0);
                        ppc_ori (code, ins->dreg, ins->dreg, 0);
                        break;
-               case CEE_CONV_I4:
-               case CEE_CONV_U4:
+               case OP_ICONV_TO_I4:
+               case OP_ICONV_TO_U4:
                case OP_MOVE:
-               case OP_SETREG:
                        ppc_mr (code, ins->dreg, ins->sreg1);
                        break;
                case OP_SETLRET: {
@@ -2783,20 +3206,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                ppc_mr (code, ppc_r4, saved);
                        break;
                }
-               case OP_SETFREG:
                case OP_FMOVE:
                        ppc_fmr (code, ins->dreg, ins->sreg1);
                        break;
                case OP_FCONV_TO_R4:
                        ppc_frsp (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_JMP: {
+               case OP_JMP: {
                        int i, pos = 0;
                        
                        /*
                         * Keep in sync with mono_arch_emit_epilog
                         */
                        g_assert (!cfg->method->save_lmf);
+                       /*
+                        * Note: we can use ppc_r11 here because it is dead anyway:
+                        * we're leaving the method.
+                        */
                        if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
                                if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) {
                                        ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
@@ -2806,6 +3232,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                }
                                ppc_mtlr (code, ppc_r0);
                        }
+
+                       code = emit_load_volatile_arguments (cfg, code);
+
                        if (ppc_is_imm16 (cfg->stack_usage)) {
                                ppc_addic (code, ppc_sp, cfg->frame_reg, cfg->stack_usage);
                        } else {
@@ -2838,48 +3267,64 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_ARGLIST: {
                        if (ppc_is_imm16 (cfg->sig_cookie + cfg->stack_usage)) {
-                               ppc_addi (code, ppc_r11, cfg->frame_reg, cfg->sig_cookie + cfg->stack_usage);
+                               ppc_addi (code, ppc_r0, cfg->frame_reg, cfg->sig_cookie + cfg->stack_usage);
                        } else {
-                               ppc_load (code, ppc_r11, cfg->sig_cookie + cfg->stack_usage);
-                               ppc_add (code, ppc_r11, cfg->frame_reg, ppc_r11);
+                               ppc_load (code, ppc_r0, cfg->sig_cookie + cfg->stack_usage);
+                               ppc_add (code, ppc_r0, cfg->frame_reg, ppc_r0);
                        }
-                       ppc_stw (code, ppc_r11, 0, ins->sreg1);
+                       ppc_stw (code, ppc_r0, 0, ins->sreg1);
                        break;
                }
                case OP_FCALL:
                case OP_LCALL:
                case OP_VCALL:
+               case OP_VCALL2:
                case OP_VOIDCALL:
-               case CEE_CALL:
+               case OP_CALL:
                        call = (MonoCallInst*)ins;
                        if (ins->flags & MONO_INST_HAS_METHOD)
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
                        else
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
-                       ppc_bl (code, 0);
+                       if (FORCE_INDIR_CALL || cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
+                       /* FIXME: this should be handled somewhere else in the new jit */
+                       code = emit_move_return_value (cfg, ins, code);
                        break;
                case OP_FCALL_REG:
                case OP_LCALL_REG:
                case OP_VCALL_REG:
+               case OP_VCALL2_REG:
                case OP_VOIDCALL_REG:
                case OP_CALL_REG:
                        ppc_mtlr (code, ins->sreg1);
                        ppc_blrl (code);
+                       /* FIXME: this should be handled somewhere else in the new jit */
+                       code = emit_move_return_value (cfg, ins, code);
                        break;
                case OP_FCALL_MEMBASE:
                case OP_LCALL_MEMBASE:
                case OP_VCALL_MEMBASE:
+               case OP_VCALL2_MEMBASE:
                case OP_VOIDCALL_MEMBASE:
                case OP_CALL_MEMBASE:
                        ppc_lwz (code, ppc_r0, ins->inst_offset, ins->sreg1);
                        ppc_mtlr (code, ppc_r0);
                        ppc_blrl (code);
+                       /* FIXME: this should be handled somewhere else in the new jit */
+                       code = emit_move_return_value (cfg, ins, code);
                        break;
                case OP_OUTARG:
                        g_assert_not_reached ();
                        break;
                case OP_LOCALLOC: {
-                       guint32 * zero_loop_jump, zero_loop_start;
+                       guint8 * zero_loop_jump, * zero_loop_start;
                        /* keep alignment */
                        int alloca_waste = PPC_STACK_PARAM_OFFSET + cfg->param_area + 31;
                        int area_offset = alloca_waste;
@@ -2888,8 +3333,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_rlwinm (code, ppc_r11, ppc_r11, 0, 0, 27);
                        /* use ctr to store the number of words to 0 if needed */
                        if (ins->flags & MONO_INST_INIT) {
-                               /* we zero 4 bytes at a time */
-                               ppc_addi (code, ppc_r0, ins->sreg1, 3);
+                               /* we zero 4 bytes at a time:
+                                * we add 7 instead of 3 so that we set the counter to
+                                * at least 1, otherwise the bdnz instruction will make
+                                * it negative and iterate billions of times.
+                                */
+                               ppc_addi (code, ppc_r0, ins->sreg1, 7);
                                ppc_srawi (code, ppc_r0, ppc_r0, 2);
                                ppc_mtctr (code, ppc_r0);
                        }
@@ -2899,7 +3348,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        
                        if (ins->flags & MONO_INST_INIT) {
                                /* adjust the dest reg by -4 so we can use stwu */
-                               ppc_addi (code, ins->dreg, ppc_sp, (area_offset - 4));
+                               /* we actually adjust -8 because we let the loop
+                                * run at least once
+                                */
+                               ppc_addi (code, ins->dreg, ppc_sp, (area_offset - 8));
                                ppc_li (code, ppc_r11, 0);
                                zero_loop_start = code;
                                ppc_stwu (code, ppc_r11, 4, ins->dreg);
@@ -2910,15 +3362,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_addi (code, ins->dreg, ppc_sp, area_offset);
                        break;
                }
-               case CEE_RET:
-                       ppc_blr (code);
-                       break;
-               case CEE_THROW: {
+               case OP_THROW: {
                        //ppc_break (code);
                        ppc_mr (code, ppc_r3, ins->sreg1);
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                             (gpointer)"mono_arch_throw_exception");
-                       ppc_bl (code, 0);
+                       if (FORCE_INDIR_CALL || cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
                        break;
                }
                case OP_RETHROW: {
@@ -2926,35 +3382,48 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mr (code, ppc_r3, ins->sreg1);
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                             (gpointer)"mono_arch_rethrow_exception");
-                       ppc_bl (code, 0);
+                       if (FORCE_INDIR_CALL || cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
                        break;
                }
-               case OP_START_HANDLER:
+               case OP_START_HANDLER: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
                        ppc_mflr (code, ppc_r0);
-                       if (ppc_is_imm16 (ins->inst_left->inst_offset)) {
-                               ppc_stw (code, ppc_r0, ins->inst_left->inst_offset, ins->inst_left->inst_basereg);
+                       if (ppc_is_imm16 (spvar->inst_offset)) {
+                               ppc_stw (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_left->inst_offset);
-                               ppc_stwx (code, ppc_r0, ppc_r11, ins->inst_left->inst_basereg);
+                               ppc_load (code, ppc_r11, spvar->inst_offset);
+                               ppc_stwx (code, ppc_r0, ppc_r11, spvar->inst_basereg);
                        }
                        break;
-               case OP_ENDFILTER:
+               }
+               case OP_ENDFILTER: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
                        if (ins->sreg1 != ppc_r3)
                                ppc_mr (code, ppc_r3, ins->sreg1);
-                       if (ppc_is_imm16 (ins->inst_left->inst_offset)) {
-                               ppc_lwz (code, ppc_r0, ins->inst_left->inst_offset, ins->inst_left->inst_basereg);
+                       if (ppc_is_imm16 (spvar->inst_offset)) {
+                               ppc_lwz (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_left->inst_offset);
-                               ppc_lwzx (code, ppc_r0, ins->inst_left->inst_basereg, ppc_r11);
+                               ppc_load (code, ppc_r11, spvar->inst_offset);
+                               ppc_lwzx (code, ppc_r0, spvar->inst_basereg, ppc_r11);
                        }
                        ppc_mtlr (code, ppc_r0);
                        ppc_blr (code);
                        break;
-               case CEE_ENDFINALLY:
-                       ppc_lwz (code, ppc_r0, ins->inst_left->inst_offset, ins->inst_left->inst_basereg);
+               }
+               case OP_ENDFINALLY: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       ppc_lwz (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg);
                        ppc_mtlr (code, ppc_r0);
                        ppc_blr (code);
                        break;
+               }
                case OP_CALL_HANDLER: 
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
                        ppc_bl (code, 0);
@@ -2962,10 +3431,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_LABEL:
                        ins->inst_c0 = code - cfg->native_code;
                        break;
-               case CEE_BR:
-                       //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
-                       //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
-                       //break;
+               case OP_BR:
                        if (ins->flags & MONO_INST_BRLABEL) {
                                /*if (ins->inst_i0->inst_c0) {
                                        ppc_b (code, 0);
@@ -2989,18 +3455,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_bcctr (code, PPC_BR_ALWAYS, 0);
                        break;
                case OP_CEQ:
+               case OP_ICEQ:
                        ppc_li (code, ins->dreg, 0);
                        ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 2);
                        ppc_li (code, ins->dreg, 1);
                        break;
                case OP_CLT:
                case OP_CLT_UN:
+               case OP_ICLT:
+               case OP_ICLT_UN:
                        ppc_li (code, ins->dreg, 1);
                        ppc_bc (code, PPC_BR_TRUE, PPC_BR_LT, 2);
                        ppc_li (code, ins->dreg, 0);
                        break;
                case OP_CGT:
                case OP_CGT_UN:
+               case OP_ICGT:
+               case OP_ICGT_UN:
                        ppc_li (code, ins->dreg, 1);
                        ppc_bc (code, PPC_BR_TRUE, PPC_BR_GT, 2);
                        ppc_li (code, ins->dreg, 0);
@@ -3017,6 +3488,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COND_EXC_LE_UN:
                        EMIT_COND_SYSTEM_EXCEPTION (ins->opcode - OP_COND_EXC_EQ, ins->inst_p1);
                        break;
+               case OP_COND_EXC_IEQ:
+               case OP_COND_EXC_INE_UN:
+               case OP_COND_EXC_ILT:
+               case OP_COND_EXC_ILT_UN:
+               case OP_COND_EXC_IGT:
+               case OP_COND_EXC_IGT_UN:
+               case OP_COND_EXC_IGE:
+               case OP_COND_EXC_IGE_UN:
+               case OP_COND_EXC_ILE:
+               case OP_COND_EXC_ILE_UN:
+                       EMIT_COND_SYSTEM_EXCEPTION (ins->opcode - OP_COND_EXC_IEQ, ins->inst_p1);
+                       break;
                case OP_COND_EXC_C:
                        /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
                         */
@@ -3032,93 +3515,73 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COND_EXC_NO:
                        g_assert_not_reached ();
                        break;
-               case CEE_BEQ:
-               case CEE_BNE_UN:
-               case CEE_BLT:
-               case CEE_BLT_UN:
-               case CEE_BGT:
-               case CEE_BGT_UN:
-               case CEE_BGE:
-               case CEE_BGE_UN:
-               case CEE_BLE:
-               case CEE_BLE_UN:
-                       EMIT_COND_BRANCH (ins, ins->opcode - CEE_BEQ);
+               case OP_IBEQ:
+               case OP_IBNE_UN:
+               case OP_IBLT:
+               case OP_IBLT_UN:
+               case OP_IBGT:
+               case OP_IBGT_UN:
+               case OP_IBGE:
+               case OP_IBGE_UN:
+               case OP_IBLE:
+               case OP_IBLE_UN:
+                       EMIT_COND_BRANCH (ins, ins->opcode - OP_IBEQ);
                        break;
 
                /* floating point opcodes */
                case OP_R8CONST:
-                       ppc_load (code, ppc_r11, ins->inst_p0);
-                       ppc_lfd (code, ins->dreg, 0, ppc_r11);
-                       break;
                case OP_R4CONST:
-                       ppc_load (code, ppc_r11, ins->inst_p0);
-                       ppc_lfs (code, ins->dreg, 0, ppc_r11);
-                       break;
+                       g_assert_not_reached ();
                case OP_STORER8_MEMBASE_REG:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stfd (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_stfdx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
                case OP_LOADR8_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lfd (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_lfdx (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_lfdx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
                case OP_STORER4_MEMBASE_REG:
+                       ppc_frsp (code, ins->sreg1, ins->sreg1);
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stfs (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_stfsx (code, ins->sreg1, ppc_r11, ins->inst_destbasereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
                case OP_LOADR4_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lfs (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r11, ins->inst_offset);
-                               ppc_lfsx (code, ins->dreg, ppc_r11, ins->inst_basereg);
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_lfsx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
-               case CEE_CONV_R_UN: {
-                       static const guint64 adjust_val = 0x4330000000000000ULL;
-                       ppc_addis (code, ppc_r0, ppc_r0, 0x4330);
-                       ppc_stw (code, ppc_r0, -8, ppc_sp);
-                       ppc_stw (code, ins->sreg1, -4, ppc_sp);
-                       ppc_load (code, ppc_r11, &adjust_val);
-                       ppc_lfd (code, ins->dreg, -8, ppc_sp);
-                       ppc_lfd (code, ppc_f0, 0, ppc_r11);
-                       ppc_fsub (code, ins->dreg, ins->dreg, ppc_f0);
+               case OP_LOADR4_MEMINDEX:
+                       ppc_lfsx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
                        break;
-               }
-               case CEE_CONV_R4: /* FIXME: change precision */
-               case CEE_CONV_R8: {
-                       static const guint64 adjust_val = 0x4330000080000000ULL;
-                       // addis is special for ppc_r0
-                       ppc_addis (code, ppc_r0, ppc_r0, 0x4330);
-                       ppc_stw (code, ppc_r0, -8, ppc_sp);
-                       ppc_xoris (code, ins->sreg1, ppc_r11, 0x8000);
-                       ppc_stw (code, ppc_r11, -4, ppc_sp);
-                       ppc_lfd (code, ins->dreg, -8, ppc_sp);
-                       ppc_load (code, ppc_r11, &adjust_val);
-                       ppc_lfd (code, ppc_f0, 0, ppc_r11);
-                       ppc_fsub (code, ins->dreg, ins->dreg, ppc_f0);
-                       break;
-               }
-               case OP_X86_FP_LOAD_I8:
-                       g_assert_not_reached ();
-                       /*x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);*/
+               case OP_LOADR8_MEMINDEX:
+                       ppc_lfdx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
                        break;
-               case OP_X86_FP_LOAD_I4:
-                       g_assert_not_reached ();
-                       /*x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);*/
+               case OP_STORER4_MEMINDEX:
+                       ppc_frsp (code, ins->sreg1, ins->sreg1);
+                       ppc_stfsx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
                        break;
+               case OP_STORER8_MEMINDEX:
+                       ppc_stfdx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       break;
+               case CEE_CONV_R_UN:
+               case CEE_CONV_R4: /* FIXME: change precision */
+               case CEE_CONV_R8:
+                       g_assert_not_reached ();
                case OP_FCONV_TO_I1:
                        code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, TRUE);
                        break;
@@ -3148,8 +3611,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                        /* Implemented as helper calls */
                        break;
+               case OP_LCONV_TO_OVF_I4_2:
                case OP_LCONV_TO_OVF_I: {
-                       guint32 *negative_branch, *msword_positive_branch, *msword_negative_branch, *ovf_ex_target;
+                       guint8 *negative_branch, *msword_positive_branch, *msword_negative_branch, *ovf_ex_target;
                        // Check if its negative
                        ppc_cmpi (code, 0, 0, ins->sreg1, 0);
                        negative_branch = code;
@@ -3196,7 +3660,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        g_assert_not_reached ();
                        break;
                case OP_FCOMPARE:
-                       ppc_fcmpo (code, 0, ins->sreg1, ins->sreg2);
+                       ppc_fcmpu (code, 0, ins->sreg1, ins->sreg2);
                        break;
                case OP_FCEQ:
                        ppc_fcmpo (code, 0, ins->sreg1, ins->sreg2);
@@ -3237,6 +3701,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_BRANCH (ins, CEE_BNE_UN - CEE_BEQ);
                        break;
                case OP_FBLT:
+                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_SO, 2);
                        EMIT_COND_BRANCH (ins, CEE_BLT - CEE_BEQ);
                        break;
                case OP_FBLT_UN:
@@ -3244,6 +3709,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_BRANCH (ins, CEE_BLT_UN - CEE_BEQ);
                        break;
                case OP_FBGT:
+                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_SO, 2);
                        EMIT_COND_BRANCH (ins, CEE_BGT - CEE_BEQ);
                        break;
                case OP_FBGT_UN:
@@ -3251,25 +3717,31 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_BRANCH (ins, CEE_BGT_UN - CEE_BEQ);
                        break;
                case OP_FBGE:
+                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_SO, 2);
                        EMIT_COND_BRANCH (ins, CEE_BGE - CEE_BEQ);
                        break;
                case OP_FBGE_UN:
                        EMIT_COND_BRANCH (ins, CEE_BGE_UN - CEE_BEQ);
                        break;
                case OP_FBLE:
+                       ppc_bc (code, PPC_BR_TRUE, PPC_BR_SO, 2);
                        EMIT_COND_BRANCH (ins, CEE_BLE - CEE_BEQ);
                        break;
                case OP_FBLE_UN:
                        EMIT_COND_BRANCH (ins, CEE_BLE_UN - CEE_BEQ);
                        break;
-               case CEE_CKFINITE: {
-                       ppc_stfd (code, ins->sreg1, -8, ppc_sp);
-                       ppc_lwz (code, ppc_r11, -8, ppc_sp);
-                       ppc_rlwinm (code, ppc_r11, ppc_r11, 0, 1, 31);
-                       ppc_addis (code, ppc_r11, ppc_r11, -32752);
-                       ppc_rlwinmd (code, ppc_r11, ppc_r11, 1, 31, 31);
+               case OP_CKFINITE:
+                       g_assert_not_reached ();
+               case OP_CHECK_FINITE: {
+                       ppc_rlwinm (code, ins->sreg1, ins->sreg1, 0, 1, 31);
+                       ppc_addis (code, ins->sreg1, ins->sreg1, -32752);
+                       ppc_rlwinmd (code, ins->sreg1, ins->sreg1, 1, 31, 31);
                        EMIT_COND_SYSTEM_EXCEPTION (CEE_BEQ - CEE_BEQ, "ArithmeticException");
                        break;
+               case OP_JUMP_TABLE:
+                       mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
+                       ppc_load (code, ins->dreg, 0x0f0f0f0f);
+                       break;
                }
                default:
                        g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
@@ -3286,8 +3758,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                last_ins = ins;
                last_offset = offset;
-               
-               ins = ins->next;
        }
 
        cfg->code_len = code - cfg->native_code;
@@ -3311,7 +3781,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
 
        for (patch_info = ji; patch_info; patch_info = patch_info->next) {
                unsigned char *ip = patch_info->ip.i + code;
-               const unsigned char *target;
+               unsigned char *target;
 
                target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
 
@@ -3324,15 +3794,13 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                        *((gpointer *)(ip)) = code + patch_info->data.offset;
                        continue;
                case MONO_PATCH_INFO_SWITCH: {
-                       gpointer *table = (gpointer *)patch_info->data.target;
+                       gpointer *table = (gpointer *)patch_info->data.table->table;
                        int i;
 
-                       // FIXME: inspect code to get the register
-                       ppc_load (ip, ppc_r11, patch_info->data.target);
-                       //*((gconstpointer *)(ip + 2)) = patch_info->data.target;
+                       patch_lis_ori (ip, table);
 
-                       for (i = 0; i < patch_info->table_size; i++) {
-                               table [i] = (int)patch_info->data.table [i] + code;
+                       for (i = 0; i < patch_info->data.table->table_size; i++) { 
+                               table [i] = (int)patch_info->data.table->table [i] + code;
                        }
                        /* we put into the table the absolute address, no need for ppc_patch in this case */
                        continue;
@@ -3359,6 +3827,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                        g_assert_not_reached ();
                        *((gconstpointer *)(ip + 1)) = patch_info->data.name;
                        continue;
+               case MONO_PATCH_INFO_NONE:
                case MONO_PATCH_INFO_BB_OVF:
                case MONO_PATCH_INFO_EXC_OVF:
                        /* everything is dealt with at epilog output time */
@@ -3370,39 +3839,6 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
        }
 }
 
-int
-mono_arch_max_epilog_size (MonoCompile *cfg)
-{
-       int max_epilog_size = 16 + 20*4;
-       MonoJumpInfo *patch_info;
-       
-       if (cfg->method->save_lmf)
-               max_epilog_size += 128;
-       
-       if (mono_jit_trace_calls != NULL)
-               max_epilog_size += 50;
-
-       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
-               max_epilog_size += 50;
-
-       /* count the number of exception infos */
-     
-       /* 
-        * make sure we have enough space for exceptions
-        * 24 is the simulated call to throw_exception_by_name
-        */
-       for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
-               if (patch_info->type == MONO_PATCH_INFO_EXC)
-                       max_epilog_size += 24;
-               else if (patch_info->type == MONO_PATCH_INFO_BB_OVF)
-                       max_epilog_size += 12;
-               else if (patch_info->type == MONO_PATCH_INFO_EXC_OVF)
-                       max_epilog_size += 12;
-       }
-
-       return max_epilog_size;
-}
-
 /*
  * Stack frame layout:
  * 
@@ -3433,11 +3869,12 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        CallInfo *cinfo;
        int tracing = 0;
        int lmf_offset = 0;
+       int tailcall_struct_index;
 
        if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
                tracing = 1;
 
-       sig = method->signature;
+       sig = mono_method_signature (method);
        cfg->code_size = 256 + sig->param_count * 20;
        code = cfg->native_code = g_malloc (cfg->code_size);
 
@@ -3445,8 +3882,6 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                ppc_mflr (code, ppc_r0);
                ppc_stw (code, ppc_r0, PPC_RET_ADDR_OFFSET, ppc_sp);
        }
-       if (cfg->max_ireg >= 29)
-               cfg->used_int_regs |= USE_EXTRA_TEMPS;
 
        alloc_size = cfg->stack_offset;
        pos = 0;
@@ -3500,16 +3935,14 @@ mono_arch_emit_prolog (MonoCompile *cfg)
         */
        max_offset = 0;
        for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
-               MonoInst *ins = bb->code;
+               MonoInst *ins;
                bb->max_offset = max_offset;
 
                if (cfg->prof_options & MONO_PROFILE_COVERAGE)
                        max_offset += 6; 
 
-               while (ins) {
-                       max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
-                       ins = ins->next;
-               }
+               MONO_BB_FOR_EACH_INS (bb, ins)
+                       max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
        }
 
        /* load arguments allocated to register from the stack */
@@ -3519,7 +3952,13 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
                ArgInfo *ainfo = &cinfo->ret;
-               inst = cfg->ret;
+
+               if (cfg->new_ir)
+                       inst = cfg->vret_addr;
+               else
+                       inst = cfg->ret;
+               g_assert (inst);
+
                if (ppc_is_imm16 (inst->inst_offset)) {
                        ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                } else {
@@ -3527,9 +3966,11 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        ppc_stwx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
                }
        }
+
+       tailcall_struct_index = 0;
        for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
                ArgInfo *ainfo = cinfo->args + i;
-               inst = cfg->varinfo [pos];
+               inst = cfg->args [pos];
                
                if (cfg->verbose_level > 2)
                        g_print ("Saving argument %d (type: %d)\n", i, ainfo->regtype);
@@ -3641,19 +4082,23 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                int size = 0;
                                g_assert (ppc_is_imm16 (inst->inst_offset));
                                g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->size * sizeof (gpointer)));
-                               if (mono_class_from_mono_type (inst->inst_vtype))
+                               /* FIXME: what if there is no class? */
+                               if (sig->pinvoke && mono_class_from_mono_type (inst->inst_vtype))
                                        size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
                                for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
-/*
-Darwin handles 1 and 2 byte structs specially by loading h/b into the arg
-register.  Should this case include linux/ppc?
-*/
 #if __APPLE__
+                                       /*
+                                        * Darwin handles 1 and 2 byte
+                                        * structs specially by
+                                        * loading h/b into the arg
+                                        * register.  Only done for
+                                        * pinvokes.
+                                        */
                                        if (size == 2)
                                                ppc_sth (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
                                        else if (size == 1)
                                                ppc_stb (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
-                                       else 
+                                       else
 #endif
                                                ppc_stw (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
                                        soffset += sizeof (gpointer);
@@ -3662,24 +4107,74 @@ register.  Should this case include linux/ppc?
                                if (ainfo->vtsize) {
                                        /* load the previous stack pointer in r11 (r0 gets overwritten by the memcpy) */
                                        ppc_lwz (code, ppc_r11, 0, ppc_sp);
-                                       /* FIXME: handle overrun! with struct sizes not multiple of 4 */
-                                       code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer), inst->inst_basereg, doffset, ppc_r11, ainfo->offset + soffset);
+                                       if ((size & 3) != 0) {
+                                               code = emit_memcpy (code, size - soffset,
+                                                       inst->inst_basereg, doffset,
+                                                       ppc_r11, ainfo->offset + soffset);
+                                       } else {
+                                               code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer),
+                                                       inst->inst_basereg, doffset,
+                                                       ppc_r11, ainfo->offset + soffset);
+                                       }
                                }
                        } else if (ainfo->regtype == RegTypeStructByAddr) {
+                               /* if it was originally a RegTypeBase */
+                               if (ainfo->offset) {
+                                       /* load the previous stack pointer in r11 */
+                                       ppc_lwz (code, ppc_r11, 0, ppc_sp);
+                                       ppc_lwz (code, ppc_r11, ainfo->offset, ppc_r11);
+                               } else {
+                                       ppc_mr (code, ppc_r11, ainfo->reg);
+                               }
+
+                               if (cfg->tailcall_valuetype_addrs) {
+                                       MonoInst *addr = cfg->tailcall_valuetype_addrs [tailcall_struct_index];
+
+                                       g_assert (ppc_is_imm16 (addr->inst_offset));
+                                       ppc_stw (code, ppc_r11, addr->inst_offset, addr->inst_basereg);
+
+                                       tailcall_struct_index++;
+                               }
+
                                g_assert (ppc_is_imm16 (inst->inst_offset));
-                               /* FIXME: handle overrun! with struct sizes not multiple of 4 */
-                               code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer), inst->inst_basereg, inst->inst_offset, ainfo->reg, 0);
+                               code = emit_memcpy (code, ainfo->vtsize, inst->inst_basereg, inst->inst_offset, ppc_r11, 0);
+                               /*g_print ("copy in %s: %d bytes from %d to offset: %d\n", method->name, ainfo->vtsize, ainfo->reg, inst->inst_offset);*/
                        } else
                                g_assert_not_reached ();
                }
                pos++;
        }
 
-       if (method->save_lmf) {
+       if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
+               ppc_load (code, ppc_r3, cfg->domain);
+               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
+               if (FORCE_INDIR_CALL || cfg->method->dynamic) {
+                       ppc_lis (code, ppc_r0, 0);
+                       ppc_ori (code, ppc_r0, ppc_r0, 0);
+                       ppc_mtlr (code, ppc_r0);
+                       ppc_blrl (code);
+               } else {
+                       ppc_bl (code, 0);
+               }
+       }
 
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+       if (method->save_lmf) {
+               if (lmf_pthread_key != -1) {
+                       emit_tls_access (code, ppc_r3, lmf_pthread_key);
+                       if (G_STRUCT_OFFSET (MonoJitTlsData, lmf))
+                               ppc_addi (code, ppc_r3, ppc_r3, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
+               } else {
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                     (gpointer)"mono_get_lmf_addr");
-               ppc_bl (code, 0);
+                       if (FORCE_INDIR_CALL || cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtlr (code, ppc_r0);
+                               ppc_blrl (code);
+                       } else {
+                               ppc_bl (code, 0);
+                       }
+               }
                /* we build the MonoLMF structure on the stack - see mini-ppc.h */
                /* lmf_offset is the offset from the previous stack pointer,
                 * alloc_size is the total stack space allocated, so the offset
@@ -3718,13 +4213,28 @@ register.  Should this case include linux/ppc?
 void
 mono_arch_emit_epilog (MonoCompile *cfg)
 {
-       MonoJumpInfo *patch_info;
        MonoMethod *method = cfg->method;
        int pos, i;
+       int max_epilog_size = 16 + 20*4;
        guint8 *code;
 
+       if (cfg->method->save_lmf)
+               max_epilog_size += 128;
+       
+       if (mono_jit_trace_calls != NULL)
+               max_epilog_size += 50;
+
+       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
+               max_epilog_size += 50;
+
+       while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               mono_jit_stats.code_reallocs++;
+       }
+
        /*
-        * Keep in sync with CEE_JMP
+        * Keep in sync with OP_JMP
         */
        code = cfg->native_code + cfg->code_len;
 
@@ -3794,11 +4304,83 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        }
        ppc_blr (code);
 
+       cfg->code_len = code - cfg->native_code;
+
+       g_assert (cfg->code_len < cfg->code_size);
+
+}
+
+/* remove once throw_exception_by_name is eliminated */
+static int
+exception_id_by_name (const char *name)
+{
+       if (strcmp (name, "IndexOutOfRangeException") == 0)
+               return MONO_EXC_INDEX_OUT_OF_RANGE;
+       if (strcmp (name, "OverflowException") == 0)
+               return MONO_EXC_OVERFLOW;
+       if (strcmp (name, "ArithmeticException") == 0)
+               return MONO_EXC_ARITHMETIC;
+       if (strcmp (name, "DivideByZeroException") == 0)
+               return MONO_EXC_DIVIDE_BY_ZERO;
+       if (strcmp (name, "InvalidCastException") == 0)
+               return MONO_EXC_INVALID_CAST;
+       if (strcmp (name, "NullReferenceException") == 0)
+               return MONO_EXC_NULL_REF;
+       if (strcmp (name, "ArrayTypeMismatchException") == 0)
+               return MONO_EXC_ARRAY_TYPE_MISMATCH;
+       g_error ("Unknown intrinsic exception %s\n", name);
+       return 0;
+}
+
+void
+mono_arch_emit_exceptions (MonoCompile *cfg)
+{
+       MonoJumpInfo *patch_info;
+       int i;
+       guint8 *code;
+       const guint8* exc_throw_pos [MONO_EXC_INTRINS_NUM] = {NULL};
+       guint8 exc_throw_found [MONO_EXC_INTRINS_NUM] = {0};
+       int max_epilog_size = 50;
+
+       /* count the number of exception infos */
+     
+       /* 
+        * make sure we have enough space for exceptions
+        * 24 is the simulated call to throw_exception_by_name
+        */
+       for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
+               if (patch_info->type == MONO_PATCH_INFO_EXC) {
+                       i = exception_id_by_name (patch_info->data.target);
+                       if (!exc_throw_found [i]) {
+                               max_epilog_size += 24;
+                               exc_throw_found [i] = TRUE;
+                       }
+               } else if (patch_info->type == MONO_PATCH_INFO_BB_OVF)
+                       max_epilog_size += 12;
+               else if (patch_info->type == MONO_PATCH_INFO_EXC_OVF) {
+                       MonoOvfJump *ovfj = (MonoOvfJump*)patch_info->data.target;
+                       i = exception_id_by_name (ovfj->data.exception);
+                       if (!exc_throw_found [i]) {
+                               max_epilog_size += 24;
+                               exc_throw_found [i] = TRUE;
+                       }
+                       max_epilog_size += 8;
+               }
+       }
+
+       while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               mono_jit_stats.code_reallocs++;
+       }
+
+       code = cfg->native_code + cfg->code_len;
+
        /* add code to raise exceptions */
        for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
                switch (patch_info->type) {
                case MONO_PATCH_INFO_BB_OVF: {
-                       MonoOvfJump *ovfj = patch_info->data.target;
+                       MonoOvfJump *ovfj = (MonoOvfJump*)patch_info->data.target;
                        unsigned char *ip = patch_info->ip.i + cfg->native_code;
                        /* patch the initial jump */
                        ppc_patch (ip, code);
@@ -3807,36 +4389,55 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                        ppc_patch (code - 4, ip + 4); /* jump back after the initiali branch */
                        /* jump back to the true target */
                        ppc_b (code, 0);
-                       ip = ovfj->bb->native_offset + cfg->native_code;
+                       ip = ovfj->data.bb->native_offset + cfg->native_code;
                        ppc_patch (code - 4, ip);
                        break;
                }
                case MONO_PATCH_INFO_EXC_OVF: {
-                       MonoOvfJump *ovfj = patch_info->data.target;
+                       MonoOvfJump *ovfj = (MonoOvfJump*)patch_info->data.target;
+                       MonoJumpInfo *newji;
                        unsigned char *ip = patch_info->ip.i + cfg->native_code;
-                       /* patch the initial jump */
+                       unsigned char *bcl = code;
+                       /* patch the initial jump: we arrived here with a call */
                        ppc_patch (ip, code);
-                       ppc_bc (code, ovfj->b0_cond, ovfj->b1_cond, 2);
+                       ppc_bc (code, ovfj->b0_cond, ovfj->b1_cond, 0);
                        ppc_b (code, 0);
                        ppc_patch (code - 4, ip + 4); /* jump back after the initiali branch */
-                       /* jump back to the true target */
-                       ppc_b (code, 0);
-                       ip = (char*)ovfj->ip + 4;
-                       ppc_patch (code - 4, ip);
+                       /* patch the conditional jump to the right handler */
+                       /* make it processed next */
+                       newji = mono_mempool_alloc (cfg->mempool, sizeof (MonoJumpInfo));
+                       newji->type = MONO_PATCH_INFO_EXC;
+                       newji->ip.i = bcl - cfg->native_code;
+                       newji->data.target = ovfj->data.exception;
+                       newji->next = patch_info->next;
+                       patch_info->next = newji;
                        break;
                }
                case MONO_PATCH_INFO_EXC: {
                        unsigned char *ip = patch_info->ip.i + cfg->native_code;
+                       i = exception_id_by_name (patch_info->data.target);
+                       if (exc_throw_pos [i]) {
+                               ppc_patch (ip, exc_throw_pos [i]);
+                               patch_info->type = MONO_PATCH_INFO_NONE;
+                               break;
+                       } else {
+                               exc_throw_pos [i] = code;
+                       }
                        ppc_patch (ip, code);
                        /*mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);*/
                        ppc_load (code, ppc_r3, patch_info->data.target);
-                       /* simulate a call from ip */
-                       ppc_load (code, ppc_r0, ip + 4);
-                       ppc_mtlr (code, ppc_r0);
+                       /* we got here from a conditional call, so the calling ip is set in lr already */
                        patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
                        patch_info->data.name = "mono_arch_throw_exception_by_name";
                        patch_info->ip.i = code - cfg->native_code;
-                       ppc_b (code, 0);
+                       if (FORCE_INDIR_CALL || cfg->method->dynamic) {
+                               ppc_lis (code, ppc_r0, 0);
+                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_mtctr (code, ppc_r0);
+                               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
+                       } else {
+                               ppc_b (code, 0);
+                       }
                        break;
                }
                default:
@@ -3851,9 +4452,145 @@ mono_arch_emit_epilog (MonoCompile *cfg)
 
 }
 
+static int
+try_offset_access (void *value, guint32 idx)
+{
+       register void* me __asm__ ("r2");
+       void ***p = (void***)((char*)me + 284);
+       int idx1 = idx / 32;
+       int idx2 = idx % 32;
+       if (!p [idx1])
+               return 0;
+       if (value != p[idx1][idx2])
+               return 0;
+       return 1;
+}
+
+static void
+setup_tls_access (void)
+{
+       guint32 ptk;
+       guint32 *ins, *code;
+       guint32 cmplwi_1023, li_0x48, blr_ins;
+       if (tls_mode == TLS_MODE_FAILED)
+               return;
+
+       if (g_getenv ("MONO_NO_TLS")) {
+               tls_mode = TLS_MODE_FAILED;
+               return;
+       }
+
+       if (tls_mode == TLS_MODE_DETECT) {
+               ins = (guint32*)pthread_getspecific;
+               /* uncond branch to the real method */
+               if ((*ins >> 26) == 18) {
+                       gint32 val;
+                       val = (*ins & ~3) << 6;
+                       val >>= 6;
+                       if (*ins & 2) {
+                               /* absolute */
+                               ins = (guint32*)val;
+                       } else {
+                               ins = (guint32*) ((char*)ins + val);
+                       }
+               }
+               code = &cmplwi_1023;
+               ppc_cmpli (code, 0, 0, ppc_r3, 1023);
+               code = &li_0x48;
+               ppc_li (code, ppc_r4, 0x48);
+               code = &blr_ins;
+               ppc_blr (code);
+               if (*ins == cmplwi_1023) {
+                       int found_lwz_284 = 0;
+                       for (ptk = 0; ptk < 20; ++ptk) {
+                               ++ins;
+                               if (!*ins || *ins == blr_ins)
+                                       break;
+                               if ((guint16)*ins == 284 && (*ins >> 26) == 32) {
+                                       found_lwz_284 = 1;
+                                       break;
+                               }
+                       }
+                       if (!found_lwz_284) {
+                               tls_mode = TLS_MODE_FAILED;
+                               return;
+                       }
+                       tls_mode = TLS_MODE_LTHREADS;
+               } else if (*ins == li_0x48) {
+                       ++ins;
+                       /* uncond branch to the real method */
+                       if ((*ins >> 26) == 18) {
+                               gint32 val;
+                               val = (*ins & ~3) << 6;
+                               val >>= 6;
+                               if (*ins & 2) {
+                                       /* absolute */
+                                       ins = (guint32*)val;
+                               } else {
+                                       ins = (guint32*) ((char*)ins + val);
+                               }
+                               code = (guint32*)&val;
+                               ppc_li (code, ppc_r0, 0x7FF2);
+                               if (ins [1] == val) {
+                                       /* Darwin on G4, implement */
+                                       tls_mode = TLS_MODE_FAILED;
+                                       return;
+                               } else {
+                                       code = (guint32*)&val;
+                                       ppc_mfspr (code, ppc_r3, 104);
+                                       if (ins [1] != val) {
+                                               tls_mode = TLS_MODE_FAILED;
+                                               return;
+                                       }
+                                       tls_mode = TLS_MODE_DARWIN_G5;
+                               }
+                       } else {
+                               tls_mode = TLS_MODE_FAILED;
+                               return;
+                       }
+               } else {
+                       tls_mode = TLS_MODE_FAILED;
+                       return;
+               }
+       }
+       if (monodomain_key == -1) {
+               ptk = mono_domain_get_tls_key ();
+               if (ptk < 1024) {
+                       ptk = mono_pthread_key_for_tls (ptk);
+                       if (ptk < 1024) {
+                               monodomain_key = ptk;
+                       }
+               }
+       }
+       if (lmf_pthread_key == -1) {
+               ptk = mono_pthread_key_for_tls (mono_jit_tls_id);
+               if (ptk < 1024) {
+                       /*g_print ("MonoLMF at: %d\n", ptk);*/
+                       /*if (!try_offset_access (mono_get_lmf_addr (), ptk)) {
+                               init_tls_failed = 1;
+                               return;
+                       }*/
+                       lmf_pthread_key = ptk;
+               }
+       }
+       if (monothread_key == -1) {
+               ptk = mono_thread_get_tls_key ();
+               if (ptk < 1024) {
+                       ptk = mono_pthread_key_for_tls (ptk);
+                       if (ptk < 1024) {
+                               monothread_key = ptk;
+                               /*g_print ("thread inited: %d\n", ptk);*/
+                       }
+               } else {
+                       /*g_print ("thread not inited yet %d\n", ptk);*/
+               }
+       }
+}
+
 void
 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
 {
+       setup_tls_access ();
 }
 
 void
@@ -3872,34 +4609,153 @@ mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_re
        /* add the this argument */
        if (this_reg != -1) {
                MonoInst *this;
-               MONO_INST_NEW (cfg, this, OP_SETREG);
+               MONO_INST_NEW (cfg, this, OP_MOVE);
                this->type = this_type;
                this->sreg1 = this_reg;
-               this->dreg = this_dreg;
+               this->dreg = mono_regstate_next_int (cfg->rs);
                mono_bblock_add_inst (cfg->cbb, this);
+               mono_call_inst_add_outarg_reg (cfg, inst, this->dreg, this_dreg, FALSE);
        }
 
        if (vt_reg != -1) {
                MonoInst *vtarg;
-               MONO_INST_NEW (cfg, vtarg, OP_SETREG);
+               MONO_INST_NEW (cfg, vtarg, OP_MOVE);
                vtarg->type = STACK_MP;
                vtarg->sreg1 = vt_reg;
-               vtarg->dreg = ppc_r3;
+               vtarg->dreg = mono_regstate_next_int (cfg->rs);
                mono_bblock_add_inst (cfg->cbb, vtarg);
+               mono_call_inst_add_outarg_reg (cfg, inst, vtarg->dreg, ppc_r3, FALSE);
+       }
+}
+
+#ifdef MONO_ARCH_HAVE_IMT
+
+#define CMP_SIZE 12
+#define BR_SIZE 4
+#define JUMP_IMM_SIZE 12
+#define ENABLE_WRONG_METHOD_CHECK 0
+
+/*
+ * LOCKING: called with the domain lock held
+ */
+gpointer
+mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
+{
+       int i;
+       int size = 0;
+       guint8 *code, *start;
+
+       for (i = 0; i < count; ++i) {
+               MonoIMTCheckItem *item = imt_entries [i];
+               if (item->is_equals) {
+                       if (item->check_target_idx) {
+                               if (!item->compare_done)
+                                       item->chunk_size += CMP_SIZE;
+                               item->chunk_size += BR_SIZE + JUMP_IMM_SIZE;
+                       } else {
+                               item->chunk_size += JUMP_IMM_SIZE;
+#if ENABLE_WRONG_METHOD_CHECK
+                               item->chunk_size += CMP_SIZE + BR_SIZE + 4;
+#endif
+                       }
+               } else {
+                       item->chunk_size += CMP_SIZE + BR_SIZE;
+                       imt_entries [item->check_target_idx]->compare_done = TRUE;
+               }
+               size += item->chunk_size;
+       }
+       /* the initial load of the vtable address */
+       size += 8;
+       code = mono_code_manager_reserve (domain->code_mp, size);
+       start = code;
+       ppc_load (code, ppc_r11, (guint32)(& (vtable->vtable [0])));
+       for (i = 0; i < count; ++i) {
+               MonoIMTCheckItem *item = imt_entries [i];
+               item->code_target = code;
+               if (item->is_equals) {
+                       if (item->check_target_idx) {
+                               if (!item->compare_done) {
+                                       ppc_load (code, ppc_r0, (guint32)item->method);
+                                       ppc_cmpl (code, 0, 0, MONO_ARCH_IMT_REG, ppc_r0);
+                               }
+                               item->jmp_code = code;
+                               ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
+                               ppc_lwz (code, ppc_r0, (sizeof (gpointer) * item->vtable_slot), ppc_r11);
+                               ppc_mtctr (code, ppc_r0);
+                               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
+                       } else {
+                               /* enable the commented code to assert on wrong method */
+#if ENABLE_WRONG_METHOD_CHECK
+                               ppc_load (code, ppc_r0, (guint32)item->method);
+                               ppc_cmpl (code, 0, 0, MONO_ARCH_IMT_REG, ppc_r0);
+                               item->jmp_code = code;
+                               ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
+#endif
+                               ppc_lwz (code, ppc_r0, (sizeof (gpointer) * item->vtable_slot), ppc_r11);
+                               ppc_mtctr (code, ppc_r0);
+                               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
+#if ENABLE_WRONG_METHOD_CHECK
+                               ppc_patch (item->jmp_code, code);
+                               ppc_break (code);
+                               item->jmp_code = NULL;
+#endif
+                       }
+               } else {
+                       ppc_load (code, ppc_r0, (guint32)item->method);
+                       ppc_cmpl (code, 0, 0, MONO_ARCH_IMT_REG, ppc_r0);
+                       item->jmp_code = code;
+                       ppc_bc (code, PPC_BR_FALSE, PPC_BR_LT, 0);
+               }
+       }
+       /* patch the branches to get to the target items */
+       for (i = 0; i < count; ++i) {
+               MonoIMTCheckItem *item = imt_entries [i];
+               if (item->jmp_code) {
+                       if (item->check_target_idx) {
+                               ppc_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
+                       }
+               }
        }
+               
+       mono_stats.imt_thunks_size += code - start;
+       g_assert (code - start <= size);
+       mono_arch_flush_icache (start, size);
+       return start;
+}
+
+MonoMethod*
+mono_arch_find_imt_method (gpointer *regs, guint8 *code)
+{
+       return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
 }
 
-gint
-mono_arch_get_opcode_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
+MonoObject*
+mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
+{
+       return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL);
+}
+#endif
+
+MonoInst*
+mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
 {
-       /* optional instruction, need to detect it
-       if (cmethod->klass == mono_defaults.math_class) {
-               if (strcmp (cmethod->name, "Sqrt") == 0)
-                       return OP_SQRT;
+       MonoInst *ins = NULL;
+
+       /*if (cmethod->klass == mono_defaults.math_class) {
+               if (strcmp (cmethod->name, "Sqrt") == 0) {
+                       MONO_INST_NEW (cfg, ins, OP_SQRT);
+                       ins->inst_i0 = args [0];
+               }
        }*/
-       return -1;
+       return ins;
 }
 
+MonoInst*
+mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
+{
+       /* FIXME: */
+       return NULL;
+}
 
 gboolean
 mono_arch_print_tree (MonoInst *tree, int arity)
@@ -3909,10 +4765,34 @@ mono_arch_print_tree (MonoInst *tree, int arity)
 
 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
 {
-       return NULL;
+       MonoInst* ins;
+
+       setup_tls_access ();
+       if (monodomain_key == -1)
+               return NULL;
+       
+       MONO_INST_NEW (cfg, ins, OP_TLS_GET);
+       ins->inst_offset = monodomain_key;
+       return ins;
 }
 
-MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
+MonoInst* 
+mono_arch_get_thread_intrinsic (MonoCompile* cfg)
 {
-       return NULL;
+       MonoInst* ins;
+
+       setup_tls_access ();
+       if (monothread_key == -1)
+               return NULL;
+       
+       MONO_INST_NEW (cfg, ins, OP_TLS_GET);
+       ins->inst_offset = monothread_key;
+       return ins;
+}
+
+gpointer
+mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
+{
+       /* FIXME: implement */
+       g_assert_not_reached ();
 }