Mon Mar 8 17:58:26 CET 2010 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-ppc.c
old mode 100644 (file)
new mode 100755 (executable)
index d026ec1..a23e078
@@ -4,94 +4,34 @@
  * Authors:
  *   Paolo Molaro (lupus@ximian.com)
  *   Dietmar Maurer (dietmar@ximian.com)
+ *   Andreas Faerber <andreas.faerber@web.de>
  *
  * (C) 2003 Ximian, Inc.
+ * (C) 2007-2008 Andreas Faerber
  */
 #include "mini.h"
 #include <string.h>
 
 #include <mono/metadata/appdomain.h>
 #include <mono/metadata/debug-helpers.h>
+#include <mono/utils/mono-proclib.h>
+#include <mono/utils/mono-mmap.h>
 
 #include "mini-ppc.h"
+#ifdef TARGET_POWERPC64
+#include "cpu-ppc64.h"
+#else
 #include "cpu-ppc.h"
+#endif
 #include "trace.h"
+#include "ir-emit.h"
 #ifdef __APPLE__
 #include <sys/sysctl.h>
 #endif
-
-/* From ir-emit.h */
-static inline guint32
-alloc_ireg (MonoCompile *cfg)
-{
-       return cfg->next_vreg ++;
-}
-
-static inline guint32
-alloc_lreg (MonoCompile *cfg)
-{
-#if SIZEOF_VOID_P == 8
-       return cfg->next_vreg ++;
-#else
-       /* Use a pair of consecutive vregs */
-       guint32 res = cfg->next_vreg;
-
-       cfg->next_vreg += 3;
-
-       return res;
-#endif
-}
-
-static inline guint32
-alloc_freg (MonoCompile *cfg)
-{
-#ifdef MONO_ARCH_SOFT_FLOAT
-       /* Allocate an lvreg so float ops can be decomposed into long ops */
-       return alloc_lreg (cfg);
-#else
-       /* Allocate these from the same pool as the int regs */
-       return cfg->next_vreg ++;
-#endif
-}
-
-static inline guint32
-alloc_dreg (MonoCompile *cfg, MonoStackType stack_type)
-{
-       switch (stack_type) {
-       case STACK_I4:
-       case STACK_PTR:
-       case STACK_MP:
-       case STACK_OBJ:
-               return alloc_ireg (cfg);
-       case STACK_R8:
-               return alloc_freg (cfg);
-       case STACK_I8:
-               return alloc_lreg (cfg);
-       case STACK_VTYPE:
-               return alloc_ireg (cfg);
-       default:
-               g_assert_not_reached ();
-       }
-}
-
-#ifdef MONO_ARCH_SOFT_FLOAT
-#define DECOMPOSE_INTO_REGPAIR(stack_type) ((stack_type) == STACK_I8 || (stack_type) == STACK_R8)
-#else
-#define DECOMPOSE_INTO_REGPAIR(stack_type) ((stack_type) == STACK_I8)
+#ifdef __linux__
+#include <unistd.h>
 #endif
 
-#define NEW_VARLOADA(cfg,dest,var,vartype) do {        \
-        MONO_INST_NEW ((cfg), (dest), OP_LDADDR); \
-               (dest)->inst_p0 = (var); \
-               (var)->flags |= MONO_INST_INDIRECT;     \
-               (dest)->type = STACK_MP;        \
-               (dest)->klass = (var)->klass;   \
-        (dest)->dreg = alloc_dreg ((cfg), STACK_MP); \
-               if (SIZEOF_VOID_P == 4 && DECOMPOSE_INTO_REGPAIR ((var)->type)) { MonoInst *var1 = get_vreg_to_inst (cfg, (var)->dreg + 1); MonoInst *var2 = get_vreg_to_inst (cfg, (var)->dreg + 2); g_assert (var1); g_assert (var2); var1->flags |= MONO_INST_INDIRECT; var2->flags |= MONO_INST_INDIRECT; } \
-       } while (0)
-
-#define EMIT_NEW_VARLOADA(cfg,dest,var,vartype) do { NEW_VARLOADA ((cfg), (dest), (var), (vartype)); MONO_ADD_INS ((cfg)->cbb, (dest)); } while (0)
-
 #define FORCE_INDIR_CALL 1
 
 enum {
@@ -103,6 +43,22 @@ enum {
        TLS_MODE_DARWIN_G5
 };
 
+/* cpu_hw_caps contains the flags defined below */
+static int cpu_hw_caps = 0;
+static int cachelinesize = 0;
+static int cachelineinc = 0;
+enum {
+       PPC_ICACHE_SNOOP      = 1 << 0,
+       PPC_MULTIPLE_LS_UNITS = 1 << 1,
+       PPC_SMP_CAPABLE       = 1 << 2,
+       PPC_ISA_2X            = 1 << 3,
+       PPC_ISA_64            = 1 << 4,
+       PPC_MOVE_FPR_GPR      = 1 << 5,
+       PPC_HW_CAP_END
+};
+
+#define BREAKPOINT_SIZE (PPC_LOAD_SEQUENCE_LENGTH + 4)
+
 /* This mutex protects architecture specific caches */
 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
@@ -111,9 +67,17 @@ static CRITICAL_SECTION mini_arch_mutex;
 int mono_exc_esp_offset = 0;
 static int tls_mode = TLS_MODE_DETECT;
 static int lmf_pthread_key = -1;
-static int monothread_key = -1;
 static int monodomain_key = -1;
 
+/*
+ * The code generated for sequence points reads from this location, which is
+ * made read-only when single stepping is enabled.
+ */
+static gpointer ss_trigger_page;
+
+/* Enabled breakpoints read from this trigger page */
+static gpointer bp_trigger_page;
+
 static int
 offsets_from_pthread_key (guint32 key, int *offset2)
 {
@@ -126,14 +90,14 @@ offsets_from_pthread_key (guint32 key, int *offset2)
 #define emit_linuxthreads_tls(code,dreg,key) do {\
                int off1, off2; \
                off1 = offsets_from_pthread_key ((key), &off2); \
-               ppc_lwz ((code), (dreg), off1, ppc_r2); \
-               ppc_lwz ((code), (dreg), off2, (dreg)); \
+               ppc_ldptr ((code), (dreg), off1, ppc_r2);       \
+               ppc_ldptr ((code), (dreg), off2, (dreg));       \
        } while (0);
 
 #define emit_darwing5_tls(code,dreg,key) do {\
                int off1 = 0x48 + key * sizeof (gpointer);      \
                ppc_mfspr ((code), (dreg), 104);        \
-               ppc_lwz ((code), (dreg), off1, (dreg)); \
+               ppc_ldptr ((code), (dreg), off1, (dreg));       \
        } while (0);
 
 /* FIXME: ensure the sc call preserves all but r3 */
@@ -146,15 +110,43 @@ offsets_from_pthread_key (guint32 key, int *offset2)
                if ((dreg) != ppc_r3) ppc_mr ((code), ppc_r3, ppc_r11); \
        } while (0);
 
+#ifdef PPC_THREAD_PTR_REG
+#define emit_nptl_tls(code,dreg,key) do { \
+               int off1 = key; \
+               int off2 = key >> 15; \
+               if ((off2 == 0) || (off2 == -1)) { \
+                       ppc_ldptr ((code), (dreg), off1, PPC_THREAD_PTR_REG);   \
+               } else { \
+                       int off3 = (off2 + 1) > 1; \
+                       ppc_addis ((code), ppc_r11, PPC_THREAD_PTR_REG, off3); \
+                       ppc_ldptr ((code), (dreg), off1, ppc_r11);      \
+               } \
+       } while (0);
+#else
+#define emit_nptl_tls(code,dreg,key) do {      \
+               g_assert_not_reached ();        \
+       } while (0)
+#endif
+
 #define emit_tls_access(code,dreg,key) do {    \
                switch (tls_mode) {     \
                case TLS_MODE_LTHREADS: emit_linuxthreads_tls(code,dreg,key); break;    \
+               case TLS_MODE_NPTL: emit_nptl_tls(code,dreg,key); break;        \
                case TLS_MODE_DARWIN_G5: emit_darwing5_tls(code,dreg,key); break;       \
                case TLS_MODE_DARWIN_G4: emit_darwing4_tls(code,dreg,key); break;       \
                default: g_assert_not_reached ();       \
                }       \
        } while (0)
 
+#define MONO_EMIT_NEW_LOAD_R8(cfg,dr,addr) do { \
+               MonoInst *inst;                                                    \
+               MONO_INST_NEW ((cfg), (inst), OP_R8CONST); \
+               inst->type = STACK_R8;                     \
+               inst->dreg = (dr);                     \
+               inst->inst_p0 = (void*)(addr);         \
+               mono_bblock_add_inst (cfg->cbb, inst); \
+       } while (0)
+
 const char*
 mono_arch_regname (int reg) {
        static const char rnames[][4] = {
@@ -193,24 +185,59 @@ emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffse
 {
        /* unrolled, use the counter in big */
        if (size > sizeof (gpointer) * 5) {
-               int shifted = size >> 2;
+               long shifted = size / SIZEOF_VOID_P;
                guint8 *copy_loop_start, *copy_loop_jump;
 
                ppc_load (code, ppc_r0, shifted);
                ppc_mtctr (code, ppc_r0);
                g_assert (sreg == ppc_r11);
-               ppc_addi (code, ppc_r12, dreg, (doffset - 4));
-               ppc_addi (code, ppc_r11, sreg, (soffset - 4));
+               ppc_addi (code, ppc_r12, dreg, (doffset - sizeof (gpointer)));
+               ppc_addi (code, ppc_r11, sreg, (soffset - sizeof (gpointer)));
                copy_loop_start = code;
-               ppc_lwzu (code, ppc_r0, ppc_r11, 4);
-               ppc_stwu (code, ppc_r0, 4, ppc_r12);
+               ppc_ldptr_update (code, ppc_r0, (unsigned int)sizeof (gpointer), ppc_r11);
+               ppc_stptr_update (code, ppc_r0, (unsigned int)sizeof (gpointer), ppc_r12);
                copy_loop_jump = code;
                ppc_bc (code, PPC_BR_DEC_CTR_NONZERO, 0, 0);
                ppc_patch (copy_loop_jump, copy_loop_start);
-               size -= shifted * 4;
+               size -= shifted * sizeof (gpointer);
                doffset = soffset = 0;
                dreg = ppc_r12;
        }
+#ifdef __mono_ppc64__
+       /* the hardware has multiple load/store units and the move is long
+          enough to use more then one regiester, then use load/load/store/store
+          to execute 2 instructions per cycle. */
+       if ((cpu_hw_caps & PPC_MULTIPLE_LS_UNITS) && (dreg != ppc_r12) && (sreg != ppc_r12)) { 
+               while (size >= 16) {
+                       ppc_ldptr (code, ppc_r0, soffset, sreg);
+                       ppc_ldptr (code, ppc_r12, soffset+8, sreg);
+                       ppc_stptr (code, ppc_r0, doffset, dreg);
+                       ppc_stptr (code, ppc_r12, doffset+8, dreg);
+                       size -= 16;
+                       soffset += 16;
+                       doffset += 16; 
+               }
+       }
+       while (size >= 8) {
+               ppc_ldr (code, ppc_r0, soffset, sreg);
+               ppc_str (code, ppc_r0, doffset, dreg);
+               size -= 8;
+               soffset += 8;
+               doffset += 8;
+       }
+#else
+       if ((cpu_hw_caps & PPC_MULTIPLE_LS_UNITS) && (dreg != ppc_r12) && (sreg != ppc_r12)) { 
+               while (size >= 8) {
+                       ppc_lwz (code, ppc_r0, soffset, sreg);
+                       ppc_lwz (code, ppc_r12, soffset+4, sreg);
+                       ppc_stw (code, ppc_r0, doffset, dreg);
+                       ppc_stw (code, ppc_r12, doffset+4, dreg);
+                       size -= 8;
+                       soffset += 8;
+                       doffset += 8; 
+               }
+       }
+#endif
        while (size >= 4) {
                ppc_lwz (code, ppc_r0, soffset, sreg);
                ppc_stw (code, ppc_r0, doffset, dreg);
@@ -249,6 +276,10 @@ emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffse
 int
 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
 {
+#ifdef __mono_ppc64__
+       NOT_IMPLEMENTED;
+       return -1;
+#else
        int k, frame_size = 0;
        int size, align, pad;
        int offset = 8;
@@ -292,14 +323,63 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit
        arg_info [k].pad = pad;
 
        return frame_size;
+#endif
+}
+
+#ifdef __mono_ppc64__
+static gboolean
+is_load_sequence (guint32 *seq)
+{
+       return ppc_opcode (seq [0]) == 15 && /* lis */
+               ppc_opcode (seq [1]) == 24 && /* ori */
+               ppc_opcode (seq [2]) == 30 && /* sldi */
+               ppc_opcode (seq [3]) == 25 && /* oris */
+               ppc_opcode (seq [4]) == 24; /* ori */
+}
+
+#define ppc_load_get_dest(l)   (((l)>>21) & 0x1f)
+#define ppc_load_get_off(l)    ((gint16)((l) & 0xffff))
+#endif
+
+/* code must point to the blrl */
+gboolean
+mono_ppc_is_direct_call_sequence (guint32 *code)
+{
+#ifdef __mono_ppc64__
+       g_assert(*code == 0x4e800021 || *code == 0x4e800020 || *code == 0x4e800420);
+
+       /* the thunk-less direct call sequence: lis/ori/sldi/oris/ori/mtlr/blrl */
+       if (ppc_opcode (code [-1]) == 31) { /* mtlr */
+               if (ppc_opcode (code [-2]) == 58 && ppc_opcode (code [-3]) == 58) { /* ld/ld */
+                       if (!is_load_sequence (&code [-8]))
+                               return FALSE;
+                       /* one of the loads must be "ld r2,8(rX)" */
+                       return (ppc_load_get_dest (code [-2]) == ppc_r2 && ppc_load_get_off (code [-2]) == 8) ||
+                               (ppc_load_get_dest (code [-3]) == ppc_r2 && ppc_load_get_off (code [-3]) == 8);
+               }
+               if (ppc_opcode (code [-2]) == 24 && ppc_opcode (code [-3]) == 31) /* mr/nop */
+                       return is_load_sequence (&code [-8]);
+               else
+                       return is_load_sequence (&code [-6]);
+       }
+       return FALSE;
+#else
+       g_assert(*code == 0x4e800021);
+
+       /* the thunk-less direct call sequence: lis/ori/mtlr/blrl */
+       return ppc_opcode (code [-1]) == 31 &&
+               ppc_opcode (code [-2]) == 24 &&
+               ppc_opcode (code [-3]) == 15;
+#endif
 }
 
 gpointer
-mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement)
+mono_arch_get_vcall_slot (guint8 *code_ptr, mgreg_t *regs, int *displacement)
 {
        char *o = NULL;
        int reg, offset = 0;
        guint32* code = (guint32*)code_ptr;
+       mgreg_t *r = (mgreg_t*)regs;
 
        *displacement = 0;
 
@@ -310,11 +390,10 @@ mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement)
        if (*code != 0x4e800021)
                return NULL;
 
-       /* the thunk-less direct call sequence: lis/ori/mtlr/blrl */
-       if ((code [-1] >> 26) == 31 && (code [-2] >> 26) == 24 && (code [-3] >> 26) == 15) {
+       if (mono_ppc_is_direct_call_sequence (code))
                return NULL;
-       }
 
+       /* FIXME: more sanity checks here */
        /* OK, we're now at the 'blrl' instruction. Now walk backwards
        till we get to a 'mtlr rA' */
        for (; --code;) {
@@ -333,13 +412,7 @@ mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement)
                        reg = (*code >> 16) & 0x1f;
                        g_assert (reg != ppc_r1);
                        /*g_print ("patching reg is %d\n", reg);*/
-                       if (reg >= 13) {
-                               MonoLMF *lmf = (MonoLMF*)((char*)regs + (14 * sizeof (double)) + (13 * sizeof (gulong)));
-                               /* saved in the MonoLMF structure */
-                               o = (gpointer)lmf->iregs [reg - 13];
-                       } else {
-                               o = regs [reg];
-                       }
+                       o = (gpointer)(gsize)r [reg];
                        break;
                }
        }
@@ -347,18 +420,83 @@ mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement)
        return o;
 }
 
-gpointer*
-mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
+#define MAX_ARCH_DELEGATE_PARAMS 7
+
+static gpointer
+get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *code_len, gboolean aot)
 {
-       gpointer vt;
-       int displacement;
-       vt = mono_arch_get_vcall_slot (code, regs, &displacement);
-       if (!vt)
-               return NULL;
-       return (gpointer*)((char*)vt + displacement);
+       guint8 *code, *start;
+
+       if (has_target) {
+               int size = MONO_PPC_32_64_CASE (32, 32) + PPC_FTNPTR_SIZE;
+
+               start = code = mono_global_codeman_reserve (size);
+               if (!aot)
+                       code = mono_ppc_create_pre_code_ftnptr (code);
+
+               /* Replace the this argument with the target */
+               ppc_ldptr (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3);
+#ifdef PPC_USES_FUNCTION_DESCRIPTOR
+               /* it's a function descriptor */
+               /* Can't use ldptr as it doesn't work with r0 */
+               ppc_ldptr_indexed (code, ppc_r0, 0, ppc_r0);
+#endif
+               ppc_mtctr (code, ppc_r0);
+               ppc_ldptr (code, ppc_r3, G_STRUCT_OFFSET (MonoDelegate, target), ppc_r3);
+               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
+
+               g_assert ((code - start) <= size);
+
+               mono_arch_flush_icache (start, size);
+       } else {
+               int size, i;
+
+               size = MONO_PPC_32_64_CASE (32, 32) + param_count * 4 + PPC_FTNPTR_SIZE;
+               start = code = mono_global_codeman_reserve (size);
+               if (!aot)
+                       code = mono_ppc_create_pre_code_ftnptr (code);
+
+               ppc_ldptr (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3);
+#ifdef PPC_USES_FUNCTION_DESCRIPTOR
+               /* it's a function descriptor */
+               ppc_ldptr_indexed (code, ppc_r0, 0, ppc_r0);
+#endif
+               ppc_mtctr (code, ppc_r0);
+               /* slide down the arguments */
+               for (i = 0; i < param_count; ++i) {
+                       ppc_mr (code, (ppc_r3 + i), (ppc_r3 + i + 1));
+               }
+               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
+
+               g_assert ((code - start) <= size);
+
+               mono_arch_flush_icache (start, size);
+       }
+
+       if (code_len)
+               *code_len = code - start;
+
+       return start;
 }
 
-#define MAX_ARCH_DELEGATE_PARAMS 7
+GSList*
+mono_arch_get_delegate_invoke_impls (void)
+{
+       GSList *res = NULL;
+       guint8 *code;
+       guint32 code_len;
+       int i;
+
+       code = get_delegate_invoke_impl (TRUE, 0, &code_len, TRUE);
+       res = g_slist_prepend (res, mono_aot_tramp_info_create (g_strdup ("delegate_invoke_impl_has_target"), code, code_len));
+
+       for (i = 0; i < MAX_ARCH_DELEGATE_PARAMS; ++i) {
+               code = get_delegate_invoke_impl (FALSE, i, &code_len, TRUE);
+               res = g_slist_prepend (res, mono_aot_tramp_info_create (g_strdup_printf ("delegate_invoke_impl_target_%d", i), code, code_len));
+       }
+
+       return res;
+}
 
 gpointer
 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
@@ -371,29 +509,21 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
 
        if (has_target) {
                static guint8* cached = NULL;
-               mono_mini_arch_lock ();
-               if (cached) {
-                       mono_mini_arch_unlock ();
+
+               if (cached)
                        return cached;
-               }
-               
-               start = code = mono_global_codeman_reserve (16);
 
-               /* Replace the this argument with the target */
-               ppc_lwz (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3);
-               ppc_mtctr (code, ppc_r0);
-               ppc_lwz (code, ppc_r3, G_STRUCT_OFFSET (MonoDelegate, target), ppc_r3);
-               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
+               if (mono_aot_only)
+                       start = mono_aot_get_named_code ("delegate_invoke_impl_has_target");
+               else
+                       start = get_delegate_invoke_impl (TRUE, 0, NULL, FALSE);
 
-               g_assert ((code - start) <= 16);
+               mono_memory_barrier ();
 
-               mono_arch_flush_icache (start, 16);
                cached = start;
-               mono_mini_arch_unlock ();
-               return cached;
        } else {
                static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
-               int size, i;
+               int i;
 
                if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
                        return NULL;
@@ -401,49 +531,151 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
                        if (!mono_is_regsize_var (sig->params [i]))
                                return NULL;
 
-               mono_mini_arch_lock ();
+
                code = cache [sig->param_count];
-               if (code) {
-                       mono_mini_arch_unlock ();
+               if (code)
                        return code;
-               }
-
-               size = 12 + sig->param_count * 4;
-               start = code = mono_global_codeman_reserve (size);
 
-               ppc_lwz (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3);
-               ppc_mtctr (code, ppc_r0);
-               /* slide down the arguments */
-               for (i = 0; i < sig->param_count; ++i) {
-                       ppc_mr (code, (ppc_r3 + i), (ppc_r3 + i + 1));
+               if (mono_aot_only) {
+                       char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count);
+                       start = mono_aot_get_named_code (name);
+                       g_free (name);
+               } else {
+                       start = get_delegate_invoke_impl (FALSE, sig->param_count, NULL, FALSE);
                }
-               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
 
-               g_assert ((code - start) <= size);
+               mono_memory_barrier ();
 
-               mono_arch_flush_icache (start, size);
                cache [sig->param_count] = start;
-               mono_mini_arch_unlock ();
-               return start;
        }
-       return NULL;
+       return start;
 }
 
 gpointer
-mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gssize *regs, guint8 *code)
+mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, mgreg_t *regs, guint8 *code)
 {
+       mgreg_t *r = (mgreg_t*)regs;
+
        /* FIXME: handle returning a struct */
        if (MONO_TYPE_ISSTRUCT (sig->ret))
-               return (gpointer)regs [ppc_r4];
-       return (gpointer)regs [ppc_r3];
+               return (gpointer)(gsize)r [ppc_r4];
+       return (gpointer)(gsize)r [ppc_r3];
+}
+
+typedef struct {
+       long int type;
+       long int value;
+} AuxVec;
+
+#ifdef USE_ENVIRON_HACK
+static AuxVec*
+linux_find_auxv (int *count)
+{
+       AuxVec *vec;
+       int c = 0;
+       char **result = __environ;
+       /* Scan over the env vector looking for the ending NULL */
+       for (; *result != NULL; ++result) {
+       }
+       /* Bump the pointer one more step, which should be the auxv. */
+       ++result;
+       vec = (AuxVec *)result;
+       if (vec->type != 22 /*AT_IGNOREPPC*/) {
+               *count = 0;
+               return NULL;
+       }
+       while (vec->type != 0 /*AT_NULL*/) {
+               vec++;
+               c++;
+       }
+       *count = c;
+       return (AuxVec *)result;
 }
+#endif
+
+#define MAX_AUX_ENTRIES 128
+/* 
+ * PPC_FEATURE_POWER4, PPC_FEATURE_POWER5, PPC_FEATURE_POWER5_PLUS, PPC_FEATURE_CELL,
+ * PPC_FEATURE_PA6T, PPC_FEATURE_ARCH_2_05 are considered supporting 2X ISA features
+ */
+#define ISA_2X (0x00080000 | 0x00040000 | 0x00020000 | 0x00010000 | 0x00000800 | 0x00001000)
+
+/* define PPC_FEATURE_64 HWCAP for 64-bit category.  */
+#define ISA_64 0x40000000
 
+/* define PPC_FEATURE_POWER6_EXT HWCAP for power6x mffgpr/mftgpr instructions.  */
+#define ISA_MOVE_FPR_GPR 0x00000200
 /*
  * Initialize the cpu to execute managed code.
  */
 void
 mono_arch_cpu_init (void)
 {
+#ifdef __APPLE__
+       int mib [3];
+       size_t len;
+       mib [0] = CTL_HW;
+       mib [1] = HW_CACHELINE;
+       len = sizeof (cachelinesize);
+       if (sysctl (mib, 2, &cachelinesize, (size_t*)&len, NULL, 0) == -1) {
+               perror ("sysctl");
+               cachelinesize = 128;
+       } else {
+               cachelineinc = cachelinesize;
+       }
+#elif defined(__linux__)
+       AuxVec vec [MAX_AUX_ENTRIES];
+       int i, vec_entries = 0;
+       /* sadly this will work only with 2.6 kernels... */
+       FILE* f = fopen ("/proc/self/auxv", "rb");
+       if (f) {
+               vec_entries = fread (&vec, sizeof (AuxVec), MAX_AUX_ENTRIES, f);
+               fclose (f);
+#ifdef USE_ENVIRON_HACK
+       } else {
+               AuxVec *evec = linux_find_auxv (&vec_entries);
+               if (vec_entries)
+                       memcpy (&vec, evec, sizeof (AuxVec) * MIN (vec_entries, MAX_AUX_ENTRIES));
+#endif
+       }
+       for (i = 0; i < vec_entries; i++) {
+               int type = vec [i].type;
+               if (type == 19) { /* AT_DCACHEBSIZE */
+                       cachelinesize = vec [i].value;
+                       continue;
+               } else if (type == 16) { /* AT_HWCAP */
+                       if (vec [i].value & 0x00002000 /*PPC_FEATURE_ICACHE_SNOOP*/)
+                               cpu_hw_caps |= PPC_ICACHE_SNOOP;
+                       if (vec [i].value & ISA_2X)
+                               cpu_hw_caps |= PPC_ISA_2X;
+                       if (vec [i].value & ISA_64)
+                               cpu_hw_caps |= PPC_ISA_64;
+                       if (vec [i].value & ISA_MOVE_FPR_GPR)
+                               cpu_hw_caps |= PPC_MOVE_FPR_GPR;
+                       continue;
+               } else if (type == 15) { /* AT_PLATFORM */
+                       const char *arch = (char*)vec [i].value;
+                       if (strcmp (arch, "ppc970") == 0 ||
+                                       (strncmp (arch, "power", 5) == 0 && arch [5] >= '4' && arch [5] <= '7'))
+                               cpu_hw_caps |= PPC_MULTIPLE_LS_UNITS;
+                       /*printf ("cpu: %s\n", (char*)vec [i].value);*/
+                       continue;
+               }
+       }
+#elif defined(G_COMPILER_CODEWARRIOR)
+       cachelinesize = 32;
+       cachelineinc = 32;
+#elif defined(MONO_CROSS_COMPILE)
+#else
+//#error Need a way to get cache line size
+#endif
+       if (!cachelinesize)
+               cachelinesize = 32;
+       if (!cachelineinc)
+               cachelineinc = cachelinesize;
+
+       if (mono_cpu_count () > 1)
+               cpu_hw_caps |= PPC_SMP_CAPABLE;
 }
 
 /*
@@ -452,7 +684,11 @@ mono_arch_cpu_init (void)
 void
 mono_arch_init (void)
 {
-       InitializeCriticalSection (&mini_arch_mutex);   
+       InitializeCriticalSection (&mini_arch_mutex);
+
+       ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
+       bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
+       mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
 }
 
 /*
@@ -477,6 +713,14 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask)
        return opts;
 }
 
+#ifdef __mono_ppc64__
+#define CASE_PPC32(c)
+#define CASE_PPC64(c)  case c:
+#else
+#define CASE_PPC32(c)  case c:
+#define CASE_PPC64(c)
+#endif
+
 static gboolean
 is_regsize_var (MonoType *t) {
        if (t->byref)
@@ -485,6 +729,8 @@ is_regsize_var (MonoType *t) {
        switch (t->type) {
        case MONO_TYPE_I4:
        case MONO_TYPE_U4:
+       CASE_PPC64 (MONO_TYPE_I8)
+       CASE_PPC64 (MONO_TYPE_U8)
        case MONO_TYPE_I:
        case MONO_TYPE_U:
        case MONO_TYPE_PTR:
@@ -542,8 +788,14 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
        if (cfg->frame_reg != ppc_sp)
                top = 31;
        /* ppc_r13 is used by the system on PPC EABI */
-       for (i = 14; i < top; ++i)
-               regs = g_list_prepend (regs, GUINT_TO_POINTER (i));
+       for (i = 14; i < top; ++i) {
+               /*
+                * Reserve r29 for holding the vtable address for virtual calls in AOT mode,
+                * since the trampolines can clobber r11.
+                */
+               if (!(cfg->compile_aot && i == 29))
+                       regs = g_list_prepend (regs, GUINT_TO_POINTER (i));
+       }
 
        return regs;
 }
@@ -562,62 +814,20 @@ mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
        return 2;
 }
 
-typedef struct {
-       long int type;
-       long int value;
-} AuxVec;
-
 void
 mono_arch_flush_icache (guint8 *code, gint size)
 {
+#ifdef MONO_CROSS_COMPILE
+#else
        register guint8 *p;
        guint8 *endp, *start;
-       static int cachelinesize = 0;
-       static int cachelineinc = 16;
 
-       if (!cachelinesize) {
-#ifdef __APPLE__
-               int mib [3];
-               size_t len;
-               mib [0] = CTL_HW;
-               mib [1] = HW_CACHELINE;
-               len = sizeof (cachelinesize);
-               if (sysctl(mib, 2, &cachelinesize, (size_t*)&len, NULL, 0) == -1) {
-                       perror ("sysctl");
-                       cachelinesize = 128;
-               } else {
-                       cachelineinc = cachelinesize;
-                       /*g_print ("setting cl size to %d\n", cachelinesize);*/
-               }
-#elif defined(__linux__)
-               /* sadly this will work only with 2.6 kernels... */
-               FILE* f = fopen ("/proc/self/auxv", "rb");
-               if (f) {
-                       AuxVec vec;
-                       while (fread (&vec, sizeof (vec), 1, f) == 1) {
-                               if (vec.type == 19) {
-                                       cachelinesize = vec.value;
-                                       break;
-                               }
-                       }
-                       fclose (f);
-               }
-               if (!cachelinesize)
-                       cachelinesize = 128;
-#elif defined(G_COMPILER_CODEWARRIOR)
-       cachelinesize = 32;
-       cachelineinc = 32;
-#else
-#warning Need a way to get cache line size
-               cachelinesize = 128;
-#endif
-       }
        p = start = code;
        endp = p + size;
        start = (guint8*)((gsize)start & ~(cachelinesize - 1));
        /* use dcbf for smp support, later optimize for UP, see pem._64bit.d20030611.pdf page 211 */
 #if defined(G_COMPILER_CODEWARRIOR)
-       if (1) {
+       if (cpu_hw_caps & PPC_SMP_CAPABLE) {
                for (p = start; p < endp; p += cachelineinc) {
                        asm { dcbf 0, p };
                }
@@ -639,7 +849,19 @@ mono_arch_flush_icache (guint8 *code, gint size)
                isync
        }
 #else
-       if (1) {
+       /* For POWER5/6 with ICACHE_SNOOPing only one icbi in the range is required.
+        * The sync is required to insure that the store queue is completely empty.
+        * While the icbi performs no cache operations, icbi/isync is required to
+        * kill local prefetch.
+        */
+       if (cpu_hw_caps & PPC_ICACHE_SNOOP) {
+               asm ("sync");
+               asm ("icbi 0,%0;" : : "r"(code) : "memory");
+               asm ("isync");
+               return;
+       }
+       /* use dcbf for smp support, see pem._64bit.d20030611.pdf page 211 */
+       if (cpu_hw_caps & PPC_SMP_CAPABLE) {
                for (p = start; p < endp; p += cachelineinc) {
                        asm ("dcbf 0,%0;" : : "r"(p) : "memory");
                }
@@ -651,11 +873,21 @@ mono_arch_flush_icache (guint8 *code, gint size)
        asm ("sync");
        p = code;
        for (p = start; p < endp; p += cachelineinc) {
-               asm ("icbi 0,%0; sync;" : : "r"(p) : "memory");
+               /* for ISA2.0+ implementations we should not need any extra sync between the
+                * icbi instructions.  Both the 2.0 PEM and the PowerISA-2.05 say this.
+                * So I am not sure which chip had this problem but its not an issue on
+                * of the ISA V2 chips.
+                */
+               if (cpu_hw_caps & PPC_ISA_2X)
+                       asm ("icbi 0,%0;" : : "r"(p) : "memory");
+               else
+                       asm ("icbi 0,%0; sync;" : : "r"(p) : "memory");
        }
-       asm ("sync");
+       if (!(cpu_hw_caps & PPC_ISA_2X))
+               asm ("sync");
        asm ("isync");
 #endif
+#endif
 }
 
 void
@@ -667,8 +899,13 @@ mono_arch_flush_register_windows (void)
 #define ALWAYS_ON_STACK(s) s
 #define FP_ALSO_IN_REG(s) s
 #else
+#ifdef __mono_ppc64__
+#define ALWAYS_ON_STACK(s) s
+#define FP_ALSO_IN_REG(s) s
+#else
 #define ALWAYS_ON_STACK(s)
 #define FP_ALSO_IN_REG(s)
+#endif
 #define ALIGN_DOUBLES
 #endif
 
@@ -684,8 +921,12 @@ typedef struct {
        gint32  offset;
        guint32 vtsize; /* in param area */
        guint8  reg;
+       guint8  vtregs; /* number of registers used to pass a RegTypeStructByVal */
        guint8  regtype : 4; /* 0 general, 1 basereg, 2 floating point register, see RegType* */
        guint8  size    : 4; /* 1, 2, 4, 8, or regs used by RegTypeStructByVal */
+       guint8  bytes   : 4; /* size in bytes - only valid for
+                               RegTypeStructByVal if the struct fits
+                               in one word, otherwise it's 0*/
 } ArgInfo;
 
 typedef struct {
@@ -702,14 +943,18 @@ typedef struct {
 static void inline
 add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple)
 {
+#ifdef __mono_ppc64__
+       g_assert (simple);
+#endif
+
        if (simple) {
                if (*gr >= 3 + PPC_NUM_REG_ARGS) {
                        ainfo->offset = PPC_STACK_PARAM_OFFSET + *stack_size;
                        ainfo->reg = ppc_sp; /* in the caller */
                        ainfo->regtype = RegTypeBase;
-                       *stack_size += 4;
+                       *stack_size += sizeof (gpointer);
                } else {
-                       ALWAYS_ON_STACK (*stack_size += 4);
+                       ALWAYS_ON_STACK (*stack_size += sizeof (gpointer));
                        ainfo->reg = *gr;
                }
        } else {
@@ -734,7 +979,7 @@ add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple)
        (*gr) ++;
 }
 
-#if __APPLE__
+#if defined(__APPLE__) || defined(__mono_ppc64__)
 static gboolean
 has_only_a_r48_field (MonoClass *klass)
 {
@@ -761,7 +1006,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 {
        guint i, fr, gr;
        int n = sig->hasthis + sig->param_count;
-       guint32 simpletype;
+       MonoType *simpletype;
        guint32 stack_size = 0;
        CallInfo *cinfo = g_malloc0 (sizeof (CallInfo) + sizeof (ArgInfo) * n);
 
@@ -796,8 +1041,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        n++;
                        continue;
                }
-               simpletype = mini_type_get_underlying_type (NULL, sig->params [i])->type;
-               switch (simpletype) {
+               simpletype = mini_type_get_underlying_type (NULL, sig->params [i]);
+               switch (simpletype->type) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
                case MONO_TYPE_U1:
@@ -832,22 +1077,27 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        n++;
                        break;
                case MONO_TYPE_GENERICINST:
-                       if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
+                       if (!mono_type_generic_inst_is_valuetype (simpletype)) {
                                cinfo->args [n].size = sizeof (gpointer);
                                add_general (&gr, &stack_size, cinfo->args + n, TRUE);
                                n++;
                                break;
                        }
                        /* Fall through */
-               case MONO_TYPE_VALUETYPE: {
+               case MONO_TYPE_VALUETYPE:
+               case MONO_TYPE_TYPEDBYREF: {
                        gint size;
                        MonoClass *klass;
+
                        klass = mono_class_from_mono_type (sig->params [i]);
-                       if (is_pinvoke)
+                       if (simpletype->type == MONO_TYPE_TYPEDBYREF)
+                               size = sizeof (MonoTypedRef);
+                       else if (is_pinvoke)
                            size = mono_class_native_size (klass, NULL);
                        else
                            size = mono_class_value_size (klass, NULL);
-#if __APPLE__
+
+#if defined(__APPLE__) || defined(__mono_ppc64__)
                        if ((size == 4 || size == 8) && has_only_a_r48_field (klass)) {
                                cinfo->args [n].size = size;
 
@@ -872,59 +1122,41 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 #endif
                        DEBUG(printf ("load %d bytes struct\n",
                                      mono_class_native_size (sig->params [i]->data.klass, NULL)));
+
 #if PPC_PASS_STRUCTS_BY_VALUE
                        {
                                int align_size = size;
-                               int nwords = 0;
+                               int nregs = 0;
                                int rest = PPC_LAST_ARG_REG - gr + 1;
                                int n_in_regs;
+
                                align_size += (sizeof (gpointer) - 1);
                                align_size &= ~(sizeof (gpointer) - 1);
-                               nwords = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
-                               n_in_regs = rest >= nwords? nwords: rest;
-                               cinfo->args [n].regtype = RegTypeStructByVal;
-                               if (gr > PPC_LAST_ARG_REG || (size >= 3 && size % 4 != 0)) {
-                                       cinfo->args [n].size = 0;
-                                       cinfo->args [n].vtsize = nwords;
-                               } else {
-                                       cinfo->args [n].size = n_in_regs;
-                                       cinfo->args [n].vtsize = nwords - n_in_regs;
-                                       cinfo->args [n].reg = gr;
-                               }
-                               gr += n_in_regs;
-                               cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
-                               /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/
-                               stack_size += nwords * sizeof (gpointer);
-                       }
-#else
-                       add_general (&gr, &stack_size, cinfo->args + n, TRUE);
-                       cinfo->args [n].regtype = RegTypeStructByAddr;
-                       cinfo->args [n].vtsize = size;
+                               nregs = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
+                               n_in_regs = MIN (rest, nregs);
+                               if (n_in_regs < 0)
+                                       n_in_regs = 0;
+#ifdef __APPLE__
+                               /* FIXME: check this */
+                               if (size >= 3 && size % 4 != 0)
+                                       n_in_regs = 0;
 #endif
-                       n++;
-                       break;
-               }
-               case MONO_TYPE_TYPEDBYREF: {
-                       int size = sizeof (MonoTypedRef);
-                       /* keep in sync or merge with the valuetype case */
-#if PPC_PASS_STRUCTS_BY_VALUE
-                       {
-                               int nwords = (size + sizeof (gpointer) -1 ) / sizeof (gpointer);
                                cinfo->args [n].regtype = RegTypeStructByVal;
-                               if (gr <= PPC_LAST_ARG_REG) {
-                                       int rest = PPC_LAST_ARG_REG - gr + 1;
-                                       int n_in_regs = rest >= nwords? nwords: rest;
-                                       cinfo->args [n].size = n_in_regs;
-                                       cinfo->args [n].vtsize = nwords - n_in_regs;
-                                       cinfo->args [n].reg = gr;
-                                       gr += n_in_regs;
-                               } else {
-                                       cinfo->args [n].size = 0;
-                                       cinfo->args [n].vtsize = nwords;
-                               }
+                               cinfo->args [n].vtregs = n_in_regs;
+                               cinfo->args [n].size = n_in_regs;
+                               cinfo->args [n].vtsize = nregs - n_in_regs;
+                               cinfo->args [n].reg = gr;
+
+#ifdef __mono_ppc64__
+                               if (nregs == 1 && is_pinvoke)
+                                       cinfo->args [n].bytes = size;
+                               else
+#endif
+                                       cinfo->args [n].bytes = 0;
+                               gr += n_in_regs;
                                cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
                                /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/
-                               stack_size += nwords * sizeof (gpointer);
+                               stack_size += nregs * sizeof (gpointer);
                        }
 #else
                        add_general (&gr, &stack_size, cinfo->args + n, TRUE);
@@ -937,7 +1169,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                case MONO_TYPE_U8:
                case MONO_TYPE_I8:
                        cinfo->args [n].size = 8;
-                       add_general (&gr, &stack_size, cinfo->args + n, FALSE);
+                       add_general (&gr, &stack_size, cinfo->args + n, SIZEOF_REGISTER == 8);
                        n++;
                        break;
                case MONO_TYPE_R4:
@@ -949,12 +1181,12 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                                cinfo->args [n].reg = fr;
                                fr ++;
                                FP_ALSO_IN_REG (gr ++);
-                               ALWAYS_ON_STACK (stack_size += 4);
+                               ALWAYS_ON_STACK (stack_size += SIZEOF_REGISTER);
                        } else {
-                               cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
+                               cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size + MONO_PPC_32_64_CASE (0, 4);
                                cinfo->args [n].regtype = RegTypeBase;
                                cinfo->args [n].reg = ppc_sp; /* in the caller*/
-                               stack_size += 4;
+                               stack_size += SIZEOF_REGISTER;
                        }
                        n++;
                        break;
@@ -965,7 +1197,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                                cinfo->args [n].regtype = RegTypeFP;
                                cinfo->args [n].reg = fr;
                                fr ++;
-                               FP_ALSO_IN_REG (gr += 2);
+                               FP_ALSO_IN_REG (gr += sizeof (double) / SIZEOF_REGISTER);
                                ALWAYS_ON_STACK (stack_size += 8);
                        } else {
                                cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
@@ -989,8 +1221,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
        }
 
        {
-               simpletype = mini_type_get_underlying_type (NULL, sig->ret)->type;
-               switch (simpletype) {
+               simpletype = mini_type_get_underlying_type (NULL, sig->ret);
+               switch (simpletype->type) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
                case MONO_TYPE_U1:
@@ -1020,7 +1252,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        cinfo->ret.regtype = RegTypeFP;
                        break;
                case MONO_TYPE_GENERICINST:
-                       if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
+                       if (!mono_type_generic_inst_is_valuetype (simpletype)) {
                                cinfo->ret.reg = ppc_r3;
                                break;
                        }
@@ -1106,7 +1338,7 @@ mono_arch_allocate_vars (MonoCompile *m)
        if (m->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)
                m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
 
-       header = mono_method_get_header (m->method);
+       header = m->header;
 
        /* 
         * We use the frame register also for any method that has
@@ -1180,9 +1412,6 @@ mono_arch_allocate_vars (MonoCompile *m)
        /* this is a global constant */
        mono_exc_esp_offset = offset;
 #endif
-       if (sig->call_convention == MONO_CALL_VARARG) {
-                m->sig_cookie = PPC_STACK_PARAM_OFFSET;
-        }
 
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
                offset += sizeof(gpointer) - 1;
@@ -1198,8 +1427,6 @@ mono_arch_allocate_vars (MonoCompile *m)
                }
 
                offset += sizeof(gpointer);
-               if (sig->call_convention == MONO_CALL_VARARG)
-                       m->sig_cookie += sizeof (gpointer);
        }
 
        offsets = mono_allocate_stack_slots_full (m, FALSE, &locals_stack_size, &locals_stack_align);
@@ -1231,8 +1458,6 @@ mono_arch_allocate_vars (MonoCompile *m)
                        offset &= ~(sizeof (gpointer) - 1);
                        inst->inst_offset = offset;
                        offset += sizeof (gpointer);
-                       if (sig->call_convention == MONO_CALL_VARARG)
-                               m->sig_cookie += sizeof (gpointer);
                }
                curinst++;
        }
@@ -1248,12 +1473,12 @@ mono_arch_allocate_vars (MonoCompile *m)
                        } else {
                                size = mono_type_size (sig->params [i], &align);
                        }
+                       if (MONO_TYPE_ISSTRUCT (sig->params [i]) && size < sizeof (gpointer))
+                               size = align = sizeof (gpointer);
                        offset += align - 1;
                        offset &= ~(align - 1);
                        inst->inst_offset = offset;
                        offset += size;
-                       if ((sig->call_convention == MONO_CALL_VARARG) && (i < sig->sentinelpos)) 
-                               m->sig_cookie += size;
                }
                curinst++;
        }
@@ -1299,7 +1524,10 @@ emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
 {
        int sig_reg = mono_alloc_ireg (cfg);
 
-       MONO_EMIT_NEW_ICONST (cfg, sig_reg, (guint32)call->signature);
+       /* FIXME: Add support for signature tokens to AOT */
+       cfg->disable_aot = TRUE;
+
+       MONO_EMIT_NEW_ICONST (cfg, sig_reg, (gulong)call->signature);
        MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG,
                        ppc_r1, cinfo->sig_cookie.offset, sig_reg);
 }
@@ -1333,6 +1561,7 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                in = call->args [i];
 
                if (ainfo->regtype == RegTypeGeneral) {
+#ifndef __mono_ppc64__
                        if (!t->byref && ((t->type == MONO_TYPE_I8) || (t->type == MONO_TYPE_U8))) {
                                MONO_INST_NEW (cfg, ins, OP_MOVE);
                                ins->dreg = mono_alloc_ireg (cfg);
@@ -1345,7 +1574,9 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                                ins->sreg1 = in->dreg + 2;
                                MONO_ADD_INS (cfg->cbb, ins);
                                mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE);
-                       } else {
+                       } else
+#endif
+                       {
                                MONO_INST_NEW (cfg, ins, OP_MOVE);
                                ins->dreg = mono_alloc_ireg (cfg);
                                ins->sreg1 = in->dreg;
@@ -1433,7 +1664,7 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
        }
 
        call->stack_usage = cinfo->stack_usage;
-       cfg->param_area = MAX (cfg->param_area, cinfo->stack_usage);
+       cfg->param_area = MAX (PPC_MINIMAL_PARAM_AREA_SIZE, MAX (cfg->param_area, cinfo->stack_usage));
        cfg->flags |= MONO_CFG_HAS_CALLS;
 
        g_free (cinfo);
@@ -1449,7 +1680,9 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
        int i, soffset, dreg;
 
        if (ainfo->regtype == RegTypeStructByVal) {
+#ifdef __APPLE__
                guint32 size = 0;
+#endif
                soffset = 0;
 #ifdef __APPLE__
                /*
@@ -1470,9 +1703,16 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
                        mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg, FALSE);
                } else
 #endif
-                       for (i = 0; i < ainfo->size; ++i) {
+                       for (i = 0; i < ainfo->vtregs; ++i) {
+                               int antipadding = 0;
+                               if (ainfo->bytes) {
+                                       g_assert (i == 0);
+                                       antipadding = sizeof (gpointer) - ainfo->bytes;
+                               }
                                dreg = mono_alloc_ireg (cfg);
                                MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, soffset);
+                               if (antipadding)
+                                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHR_UN_IMM, dreg, dreg, antipadding * 8);
                                mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg + i, FALSE);
                                soffset += sizeof (gpointer);
                        }
@@ -1519,6 +1759,7 @@ mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
                        mono_method_signature (method)->ret);
 
        if (!ret->byref) {
+#ifndef __mono_ppc64__
                if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
                        MonoInst *ins;
 
@@ -1528,6 +1769,7 @@ mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
                        MONO_ADD_INS (cfg->cbb, ins);
                        return;
                }
+#endif
                if (ret->type == MONO_TYPE_R8 || ret->type == MONO_TYPE_R4) {
                        MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
                        return;
@@ -1552,9 +1794,9 @@ mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean ena
 {
        guchar *code = p;
 
-       ppc_load (code, ppc_r3, cfg->method);
+       ppc_load_ptr (code, ppc_r3, cfg->method);
        ppc_li (code, ppc_r4, 0); /* NULL ebp for now */
-       ppc_load (code, ppc_r0, func);
+       ppc_load_func (code, ppc_r0, func);
        ppc_mtlr (code, ppc_r0);
        ppc_blrl (code);
        return code;
@@ -1569,7 +1811,7 @@ enum {
 };
 
 void*
-mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
+mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
 {
        guchar *code = p;
        int save_mode = SAVE_NONE;
@@ -1597,10 +1839,12 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
                else
                        save_mode = SAVE_NONE;
                break;
+#ifndef __mono_ppc64__
        case MONO_TYPE_I8:
        case MONO_TYPE_U8:
                save_mode = SAVE_TWO;
                break;
+#endif
        case MONO_TYPE_R4:
        case MONO_TYPE_R8:
                save_mode = SAVE_FP;
@@ -1623,7 +1867,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
                }
                break;
        case SAVE_ONE:
-               ppc_stw (code, ppc_r3, save_offset, cfg->frame_reg);
+               ppc_stptr (code, ppc_r3, save_offset, cfg->frame_reg);
                if (enable_arguments) {
                        ppc_mr (code, ppc_r4, ppc_r3);
                }
@@ -1633,6 +1877,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
                if (enable_arguments) {
                        /* FIXME: what reg?  */
                        ppc_fmr (code, ppc_f3, ppc_f1);
+                       /* FIXME: use 8 byte load on PPC64 */
                        ppc_lwz (code, ppc_r4, save_offset, cfg->frame_reg);
                        ppc_lwz (code, ppc_r5, save_offset + 4, cfg->frame_reg);
                }
@@ -1648,8 +1893,8 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
                break;
        }
 
-       ppc_load (code, ppc_r3, cfg->method);
-       ppc_load (code, ppc_r0, func);
+       ppc_load_ptr (code, ppc_r3, cfg->method);
+       ppc_load_func (code, ppc_r0, func);
        ppc_mtlr (code, ppc_r0);
        ppc_blrl (code);
 
@@ -1659,7 +1904,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
                ppc_lwz (code, ppc_r4, save_offset + 4, cfg->frame_reg);
                break;
        case SAVE_ONE:
-               ppc_lwz (code, ppc_r3, save_offset, cfg->frame_reg);
+               ppc_ldptr (code, ppc_r3, save_offset, cfg->frame_reg);
                break;
        case SAVE_FP:
                ppc_lfd (code, ppc_f1, save_offset, cfg->frame_reg);
@@ -1690,31 +1935,22 @@ typedef struct {
 } MonoOvfJump;
 
 #define EMIT_COND_BRANCH_FLAGS(ins,b0,b1) \
-if (ins->flags & MONO_INST_BRLABEL) { \
-        if (0 && ins->inst_i0->inst_c0) { \
-               ppc_bc (code, (b0), (b1), (code - cfg->native_code + ins->inst_i0->inst_c0) & 0xffff);  \
-        } else { \
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
-               ppc_bc (code, (b0), (b1), 0);   \
-        } \
+if (0 && ins->inst_true_bb->native_offset) { \
+       ppc_bc (code, (b0), (b1), (code - cfg->native_code + ins->inst_true_bb->native_offset) & 0xffff); \
 } else { \
-        if (0 && ins->inst_true_bb->native_offset) { \
-               ppc_bc (code, (b0), (b1), (code - cfg->native_code + ins->inst_true_bb->native_offset) & 0xffff); \
-        } else { \
-               int br_disp = ins->inst_true_bb->max_offset - offset;   \
-               if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \
-                       MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump));    \
-                       ovfj->data.bb = ins->inst_true_bb;      \
-                       ovfj->ip_offset = 0;    \
-                       ovfj->b0_cond = (b0);   \
-                       ovfj->b1_cond = (b1);   \
-                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB_OVF, ovfj); \
-                       ppc_b (code, 0);        \
-               } else {        \
-                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
-                       ppc_bc (code, (b0), (b1), 0);   \
-               }       \
-        } \
+       int br_disp = ins->inst_true_bb->max_offset - offset;   \
+       if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \
+               MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump));    \
+               ovfj->data.bb = ins->inst_true_bb;      \
+               ovfj->ip_offset = 0;    \
+               ovfj->b0_cond = (b0);   \
+               ovfj->b1_cond = (b1);   \
+               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB_OVF, ovfj); \
+               ppc_b (code, 0);        \
+       } else {        \
+               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
+               ppc_bc (code, (b0), (b1), 0);   \
+       }       \
 }
 
 #define EMIT_COND_BRANCH(ins,cond) EMIT_COND_BRANCH_FLAGS(ins, branch_b0_table [(cond)], branch_b1_table [(cond)])
@@ -1750,13 +1986,38 @@ mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
 }
 
+static int
+normalize_opcode (int opcode)
+{
+       switch (opcode) {
+#ifndef __mono_ilp32__
+       case MONO_PPC_32_64_CASE (OP_LOADI4_MEMBASE, OP_LOADI8_MEMBASE):
+               return OP_LOAD_MEMBASE;
+       case MONO_PPC_32_64_CASE (OP_LOADI4_MEMINDEX, OP_LOADI8_MEMINDEX):
+               return OP_LOAD_MEMINDEX;
+       case MONO_PPC_32_64_CASE (OP_STOREI4_MEMBASE_REG, OP_STOREI8_MEMBASE_REG):
+               return OP_STORE_MEMBASE_REG;
+       case MONO_PPC_32_64_CASE (OP_STOREI4_MEMBASE_IMM, OP_STOREI8_MEMBASE_IMM):
+               return OP_STORE_MEMBASE_IMM;
+       case MONO_PPC_32_64_CASE (OP_STOREI4_MEMINDEX, OP_STOREI8_MEMINDEX):
+               return OP_STORE_MEMINDEX;
+#endif
+       case MONO_PPC_32_64_CASE (OP_ISHR_IMM, OP_LSHR_IMM):
+               return OP_SHR_IMM;
+       case MONO_PPC_32_64_CASE (OP_ISHR_UN_IMM, OP_LSHR_UN_IMM):
+               return OP_SHR_UN_IMM;
+       default:
+               return opcode;
+       }
+}
+
 void
 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
 {
        MonoInst *ins, *n, *last_ins = NULL;
 
        MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
-               switch (ins->opcode) {
+               switch (normalize_opcode (ins->opcode)) {
                case OP_MUL_IMM: 
                        /* remove unnecessary multiplication with 1 */
                        if (ins->inst_imm == 1) {
@@ -1775,13 +2036,11 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_LOAD_MEMBASE:
-               case OP_LOADI4_MEMBASE:
                        /* 
                         * OP_STORE_MEMBASE_REG reg, offset(basereg) 
                         * OP_LOAD_MEMBASE offset(basereg), reg
                         */
-                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
-                                        || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
+                       if (last_ins && normalize_opcode (last_ins->opcode) == OP_STORE_MEMBASE_REG &&
                            ins->inst_basereg == last_ins->inst_destbasereg &&
                            ins->inst_offset == last_ins->inst_offset) {
                                if (ins->dreg == last_ins->sreg1) {
@@ -1801,8 +2060,7 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                         * OP_LOAD_MEMBASE offset(basereg), reg1
                         * OP_MOVE reg1, reg2
                         */
-                       } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
-                                          || last_ins->opcode == OP_LOAD_MEMBASE) &&
+                       } else if (last_ins && normalize_opcode (last_ins->opcode) == OP_LOAD_MEMBASE &&
                              ins->inst_basereg != last_ins->dreg &&
                              ins->inst_basereg == last_ins->inst_basereg &&
                              ins->inst_offset == last_ins->inst_offset) {
@@ -1825,8 +2083,7 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                         * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
                         * OP_ICONST reg, imm
                         */
-                       } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
-                                               || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
+                       } else if (last_ins && normalize_opcode (last_ins->opcode) == OP_STORE_MEMBASE_IMM &&
                                   ins->inst_basereg == last_ins->inst_destbasereg &&
                                   ins->inst_offset == last_ins->inst_offset) {
                                //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
@@ -1854,6 +2111,17 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                                ins->sreg1 = last_ins->sreg1;                           
                        }
                        break;
+#ifdef __mono_ppc64__
+               case OP_LOADU4_MEMBASE:
+               case OP_LOADI4_MEMBASE:
+                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
+                                       ins->inst_basereg == last_ins->inst_destbasereg &&
+                                       ins->inst_offset == last_ins->inst_offset) {
+                               ins->opcode = (ins->opcode == OP_LOADI4_MEMBASE) ? OP_ICONV_TO_I4 : OP_ICONV_TO_U4;
+                               ins->sreg1 = last_ins->sreg1;
+                       }
+                       break;
+#endif
                case OP_MOVE:
                        ins->opcode = OP_MOVE;
                        /* 
@@ -1905,32 +2173,40 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
                ins->opcode = OP_NOP;
                break;
        }
+#ifndef __mono_ppc64__
        case OP_ICONV_TO_R4:
        case OP_ICONV_TO_R8: {
-               /* FIXME: change precision for CEE_CONV_R4 */
-               static const guint64 adjust_val = 0x4330000080000000ULL;
-               int msw_reg = mono_alloc_ireg (cfg);
-               int xored = mono_alloc_ireg (cfg);
-               int adj_reg = mono_alloc_freg (cfg);
-               int tmp_reg = mono_alloc_freg (cfg);
-               int basereg = ppc_sp;
-               int offset = -8;
-               if (!ppc_is_imm16 (offset + 4)) {
-                       basereg = mono_alloc_ireg (cfg);
-                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
+               /* If we have a PPC_FEATURE_64 machine we can avoid
+                  this and use the fcfid instruction.  Otherwise
+                  on an old 32-bit chip and we have to do this the
+                  hard way.  */
+               if (!(cpu_hw_caps & PPC_ISA_64)) {
+                       /* FIXME: change precision for CEE_CONV_R4 */
+                       static const guint64 adjust_val = 0x4330000080000000ULL;
+                       int msw_reg = mono_alloc_ireg (cfg);
+                       int xored = mono_alloc_ireg (cfg);
+                       int adj_reg = mono_alloc_freg (cfg);
+                       int tmp_reg = mono_alloc_freg (cfg);
+                       int basereg = ppc_sp;
+                       int offset = -8;
+                       if (!ppc_is_imm16 (offset + 4)) {
+                               basereg = mono_alloc_ireg (cfg);
+                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
+                       }
+                       MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
+                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
+                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
+                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored);
+                       MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
+                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
+                       MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
+                       if (ins->opcode == OP_ICONV_TO_R4)
+                               MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
+                       ins->opcode = OP_NOP;
                }
-               MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
-               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
-               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
-               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored);
-               MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
-               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
-               MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
-               if (ins->opcode == OP_ICONV_TO_R4)
-                       MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
-               ins->opcode = OP_NOP;
                break;
        }
+#endif
        case OP_CKFINITE: {
                int msw_reg = mono_alloc_ireg (cfg);
                int basereg = ppc_sp;
@@ -1946,6 +2222,66 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
                ins->opcode = OP_NOP;
                break;
        }
+#ifdef __mono_ppc64__
+       case OP_IADD_OVF:
+       case OP_IADD_OVF_UN:
+       case OP_ISUB_OVF: {
+               int shifted1_reg = mono_alloc_ireg (cfg);
+               int shifted2_reg = mono_alloc_ireg (cfg);
+               int result_shifted_reg = mono_alloc_ireg (cfg);
+
+               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, shifted1_reg, ins->sreg1, 32);
+               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, shifted2_reg, ins->sreg2, 32);
+               MONO_EMIT_NEW_BIALU (cfg, ins->opcode, result_shifted_reg, shifted1_reg, shifted2_reg);
+               if (ins->opcode == OP_IADD_OVF_UN)
+                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHR_UN_IMM, ins->dreg, result_shifted_reg, 32);
+               else
+                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHR_IMM, ins->dreg, result_shifted_reg, 32);
+               ins->opcode = OP_NOP;
+       }
+#endif
+       }
+}
+
+void
+mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *ins)
+{
+       switch (ins->opcode) {
+       case OP_LADD_OVF:
+               /* ADC sets the condition code */
+               MONO_EMIT_NEW_BIALU (cfg, OP_ADDCC, ins->dreg + 1, ins->sreg1 + 1, ins->sreg2 + 1);
+               MONO_EMIT_NEW_BIALU (cfg, OP_ADD_OVF_CARRY, ins->dreg + 2, ins->sreg1 + 2, ins->sreg2 + 2);
+               NULLIFY_INS (ins);
+               break;
+       case OP_LADD_OVF_UN:
+               /* ADC sets the condition code */
+               MONO_EMIT_NEW_BIALU (cfg, OP_ADDCC, ins->dreg + 1, ins->sreg1 + 1, ins->sreg2 + 1);
+               MONO_EMIT_NEW_BIALU (cfg, OP_ADD_OVF_UN_CARRY, ins->dreg + 2, ins->sreg1 + 2, ins->sreg2 + 2);
+               NULLIFY_INS (ins);
+               break;
+       case OP_LSUB_OVF:
+               /* SBB sets the condition code */
+               MONO_EMIT_NEW_BIALU (cfg, OP_SUBCC, ins->dreg + 1, ins->sreg1 + 1, ins->sreg2 + 1);
+               MONO_EMIT_NEW_BIALU (cfg, OP_SUB_OVF_CARRY, ins->dreg + 2, ins->sreg1 + 2, ins->sreg2 + 2);
+               NULLIFY_INS (ins);
+               break;
+       case OP_LSUB_OVF_UN:
+               /* SBB sets the condition code */
+               MONO_EMIT_NEW_BIALU (cfg, OP_SUBCC, ins->dreg + 1, ins->sreg1 + 1, ins->sreg2 + 1);
+               MONO_EMIT_NEW_BIALU (cfg, OP_SUB_OVF_UN_CARRY, ins->dreg + 2, ins->sreg1 + 2, ins->sreg2 + 2);
+               NULLIFY_INS (ins);
+               break;
+       case OP_LNEG:
+               /* This is the old version from inssel-long32.brg */
+               MONO_EMIT_NEW_UNALU (cfg, OP_INOT, ins->dreg + 1, ins->sreg1 + 1);
+               MONO_EMIT_NEW_UNALU (cfg, OP_INOT, ins->dreg + 2, ins->sreg1 + 2);
+               /* ADC sets the condition codes */
+               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 1, ins->dreg + 1, 1);
+               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 2, ins->dreg + 2, 0);
+               NULLIFY_INS (ins);
+               break;
+       default:
+               break;
        }
 }
 
@@ -2012,6 +2348,8 @@ map_to_reg_reg_op (int op)
                return OP_COMPARE;
        case OP_ICOMPARE_IMM:
                return OP_ICOMPARE;
+       case OP_LCOMPARE_IMM:
+               return OP_LCOMPARE;
        case OP_ADDCC_IMM:
                return OP_IADDCC;
        case OP_ADC_IMM:
@@ -2032,6 +2370,8 @@ map_to_reg_reg_op (int op)
                return OP_LOADI4_MEMINDEX;
        case OP_LOADU4_MEMBASE:
                return OP_LOADU4_MEMINDEX;
+       case OP_LOADI8_MEMBASE:
+               return OP_LOADI8_MEMINDEX;
        case OP_LOADU1_MEMBASE:
                return OP_LOADU1_MEMINDEX;
        case OP_LOADI2_MEMBASE:
@@ -2050,6 +2390,8 @@ map_to_reg_reg_op (int op)
                return OP_STOREI2_MEMINDEX;
        case OP_STOREI4_MEMBASE_REG:
                return OP_STOREI4_MEMINDEX;
+       case OP_STOREI8_MEMBASE_REG:
+               return OP_STOREI8_MEMINDEX;
        case OP_STORE_MEMBASE_REG:
                return OP_STORE_MEMINDEX;
        case OP_STORER4_MEMBASE_REG:
@@ -2064,6 +2406,8 @@ map_to_reg_reg_op (int op)
                return OP_STOREI2_MEMBASE_REG;
        case OP_STOREI4_MEMBASE_IMM:
                return OP_STOREI4_MEMBASE_REG;
+       case OP_STOREI8_MEMBASE_IMM:
+               return OP_STOREI8_MEMBASE_REG;
        }
        return mono_op_imm_to_op (op);
 }
@@ -2072,10 +2416,14 @@ map_to_reg_reg_op (int op)
 
 #define compare_opcode_is_unsigned(opcode) \
                (((opcode) >= CEE_BNE_UN && (opcode) <= CEE_BLT_UN) ||  \
-               (((opcode) >= OP_IBNE_UN && (opcode) <= OP_IBLT_UN) ||  \
+               ((opcode) >= OP_IBNE_UN && (opcode) <= OP_IBLT_UN) ||   \
+               ((opcode) >= OP_LBNE_UN && (opcode) <= OP_LBLT_UN) ||   \
                ((opcode) >= OP_COND_EXC_NE_UN && (opcode) <= OP_COND_EXC_LT_UN) ||     \
                ((opcode) >= OP_COND_EXC_INE_UN && (opcode) <= OP_COND_EXC_ILT_UN) ||   \
-               ((opcode) == OP_CLT_UN || (opcode) == OP_CGT_UN || (opcode) == OP_ICLT_UN || (opcode) == OP_ICGT_UN)))
+               ((opcode) == OP_CLT_UN || (opcode) == OP_CGT_UN ||      \
+                (opcode) == OP_ICLT_UN || (opcode) == OP_ICGT_UN ||    \
+                (opcode) == OP_LCLT_UN || (opcode) == OP_LCGT_UN))
+
 /*
  * Remove from the instruction list the instructions that can't be
  * represented with very simple instructions with no register
@@ -2087,10 +2435,6 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
        MonoInst *ins, *next, *temp, *last_ins = NULL;
        int imm;
 
-       /* setup the virtual reg allocator */
-       if (bb->max_vreg > cfg->rs->next_vreg)
-               cfg->rs->next_vreg = bb->max_vreg;
-
        MONO_BB_FOR_EACH_INS (bb, ins) {
 loop_start:
                switch (ins->opcode) {
@@ -2100,7 +2444,7 @@ loop_start:
                case OP_IREM_UN_IMM:
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
                        if (ins->opcode == OP_IDIV_IMM)
                                ins->opcode = OP_IDIV;
@@ -2114,42 +2458,52 @@ loop_start:
                        /* handle rem separately */
                        goto loop_start;
                case OP_IREM:
-               case OP_IREM_UN: {
+               case OP_IREM_UN:
+               CASE_PPC64 (OP_LREM)
+               CASE_PPC64 (OP_LREM_UN) {
                        MonoInst *mul;
                        /* we change a rem dest, src1, src2 to
                         * div temp1, src1, src2
                         * mul temp2, temp1, src2
                         * sub dest, src1, temp2
                         */
-                       NEW_INS (cfg, mul, OP_IMUL);
-                       NEW_INS (cfg, temp, ins->opcode == OP_IREM? OP_IDIV: OP_IDIV_UN);
+                       if (ins->opcode == OP_IREM || ins->opcode == OP_IREM_UN) {
+                               NEW_INS (cfg, mul, OP_IMUL);
+                               NEW_INS (cfg, temp, ins->opcode == OP_IREM? OP_IDIV: OP_IDIV_UN);
+                               ins->opcode = OP_ISUB;
+                       } else {
+                               NEW_INS (cfg, mul, OP_LMUL);
+                               NEW_INS (cfg, temp, ins->opcode == OP_LREM? OP_LDIV: OP_LDIV_UN);
+                               ins->opcode = OP_LSUB;
+                       }
                        temp->sreg1 = ins->sreg1;
                        temp->sreg2 = ins->sreg2;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        mul->sreg1 = temp->dreg;
                        mul->sreg2 = ins->sreg2;
-                       mul->dreg = mono_regstate_next_int (cfg->rs);
-                       ins->opcode = OP_ISUB;
+                       mul->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = mul->dreg;
                        break;
                }
                case OP_IADD_IMM:
+               CASE_PPC64 (OP_LADD_IMM)
                case OP_ADD_IMM:
                case OP_ADDCC_IMM:
                        if (!ppc_is_imm16 (ins->inst_imm)) {
                                NEW_INS (cfg,  temp, OP_ICONST);
                                temp->inst_c0 = ins->inst_imm;
-                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->sreg2 = temp->dreg;
                                ins->opcode = map_to_reg_reg_op (ins->opcode);
                        }
                        break;
                case OP_ISUB_IMM:
+               CASE_PPC64 (OP_LSUB_IMM)
                case OP_SUB_IMM:
                        if (!ppc_is_imm16 (-ins->inst_imm)) {
                                NEW_INS (cfg, temp, OP_ICONST);
                                temp->inst_c0 = ins->inst_imm;
-                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->sreg2 = temp->dreg;
                                ins->opcode = map_to_reg_reg_op (ins->opcode);
                        }
@@ -2157,17 +2511,26 @@ loop_start:
                case OP_IAND_IMM:
                case OP_IOR_IMM:
                case OP_IXOR_IMM:
+               case OP_LAND_IMM:
+               case OP_LOR_IMM:
+               case OP_LXOR_IMM:
                case OP_AND_IMM:
                case OP_OR_IMM:
-               case OP_XOR_IMM:
-                       if ((ins->inst_imm & 0xffff0000) && (ins->inst_imm & 0xffff)) {
+               case OP_XOR_IMM: {
+                       gboolean is_imm = ((ins->inst_imm & 0xffff0000) && (ins->inst_imm & 0xffff));
+#ifdef __mono_ppc64__
+                       if (ins->inst_imm & 0xffffffff00000000ULL)
+                               is_imm = TRUE;
+#endif
+                       if (is_imm) {
                                NEW_INS (cfg, temp, OP_ICONST);
                                temp->inst_c0 = ins->inst_imm;
-                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->sreg2 = temp->dreg;
                                ins->opcode = map_to_reg_reg_op (ins->opcode);
                        }
                        break;
+               }
                case OP_ISBB_IMM:
                case OP_IADC_IMM:
                case OP_SBB_IMM:
@@ -2175,12 +2538,13 @@ loop_start:
                case OP_ADC_IMM:
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                        break;
                case OP_COMPARE_IMM:
                case OP_ICOMPARE_IMM:
+               CASE_PPC64 (OP_LCOMPARE_IMM)
                        next = ins->next;
                        /* Branch opts can eliminate the branch */
                        if (!next || (!(MONO_IS_COND_BRANCH_OP (next) || MONO_IS_COND_EXC (next) || MONO_IS_SETCC (next)))) {
@@ -2192,7 +2556,7 @@ loop_start:
                                if (!ppc_is_uimm16 (ins->inst_imm)) {
                                        NEW_INS (cfg, temp, OP_ICONST);
                                        temp->inst_c0 = ins->inst_imm;
-                                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                                       temp->dreg = mono_alloc_ireg (cfg);
                                        ins->sreg2 = temp->dreg;
                                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                                }
@@ -2200,7 +2564,7 @@ loop_start:
                                if (!ppc_is_imm16 (ins->inst_imm)) {
                                        NEW_INS (cfg, temp, OP_ICONST);
                                        temp->inst_c0 = ins->inst_imm;
-                                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                                       temp->dreg = mono_alloc_ireg (cfg);
                                        ins->sreg2 = temp->dreg;
                                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                                }
@@ -2226,7 +2590,7 @@ loop_start:
                        if (!ppc_is_imm16 (ins->inst_imm)) {
                                NEW_INS (cfg, temp, OP_ICONST);
                                temp->inst_c0 = ins->inst_imm;
-                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               temp->dreg = mono_alloc_ireg (cfg);
                                ins->sreg2 = temp->dreg;
                                ins->opcode = map_to_reg_reg_op (ins->opcode);
                        }
@@ -2234,12 +2598,13 @@ loop_start:
                case OP_LOCALLOC_IMM:
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg1 = temp->dreg;
                        ins->opcode = OP_LOCALLOC;
                        break;
                case OP_LOAD_MEMBASE:
                case OP_LOADI4_MEMBASE:
+               CASE_PPC64 (OP_LOADI8_MEMBASE)
                case OP_LOADU4_MEMBASE:
                case OP_LOADI2_MEMBASE:
                case OP_LOADU2_MEMBASE:
@@ -2248,6 +2613,7 @@ loop_start:
                case OP_LOADR4_MEMBASE:
                case OP_LOADR8_MEMBASE:
                case OP_STORE_MEMBASE_REG:
+               CASE_PPC64 (OP_STOREI8_MEMBASE_REG)
                case OP_STOREI4_MEMBASE_REG:
                case OP_STOREI2_MEMBASE_REG:
                case OP_STOREI1_MEMBASE_REG:
@@ -2262,7 +2628,7 @@ loop_start:
                                break;
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_offset;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg2 = temp->dreg;
                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                        break;
@@ -2270,18 +2636,23 @@ loop_start:
                case OP_STOREI1_MEMBASE_IMM:
                case OP_STOREI2_MEMBASE_IMM:
                case OP_STOREI4_MEMBASE_IMM:
+               CASE_PPC64 (OP_STOREI8_MEMBASE_IMM)
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->sreg1 = temp->dreg;
                        ins->opcode = map_to_reg_reg_op (ins->opcode);
                        last_ins = temp;
                        goto loop_start; /* make it handle the possibly big ins->inst_offset */
                case OP_R8CONST:
                case OP_R4CONST:
+                       if (cfg->compile_aot) {
+                               /* Keep these in the aot case */
+                               break;
+                       }
                        NEW_INS (cfg, temp, OP_ICONST);
-                       temp->inst_c0 = (guint32)ins->inst_p0;
-                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       temp->inst_c0 = (gulong)ins->inst_p0;
+                       temp->dreg = mono_alloc_ireg (cfg);
                        ins->inst_basereg = temp->dreg;
                        ins->inst_offset = 0;
                        ins->opcode = ins->opcode == OP_R4CONST? OP_LOADR4_MEMBASE: OP_LOADR8_MEMBASE;
@@ -2294,35 +2665,58 @@ loop_start:
                last_ins = ins;
        }
        bb->last_ins = last_ins;
-       bb->max_vreg = cfg->rs->next_vreg;
-       
+       bb->max_vreg = cfg->next_vreg;  
 }
 
 static guchar*
 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
 {
-       int offset = cfg->arch.fp_conv_var_offset;
+       long offset = cfg->arch.fp_conv_var_offset;
+       long sub_offset;
        /* sreg is a float, dreg is an integer reg. ppc_f0 is used a scratch */
-       ppc_fctiwz (code, ppc_f0, sreg);
-       if (ppc_is_imm16 (offset + 4)) {
+#ifdef __mono_ppc64__
+       if (size == 8) {
+               ppc_fctidz (code, ppc_f0, sreg);
+               sub_offset = 0;
+       } else
+#endif
+       {
+               ppc_fctiwz (code, ppc_f0, sreg);
+               sub_offset = 4;
+       }
+       if (ppc_is_imm16 (offset + sub_offset)) {
                ppc_stfd (code, ppc_f0, offset, cfg->frame_reg);
-               ppc_lwz (code, dreg, offset + 4, cfg->frame_reg);
+               if (size == 8)
+                       ppc_ldr (code, dreg, offset + sub_offset, cfg->frame_reg);
+               else
+                       ppc_lwz (code, dreg, offset + sub_offset, cfg->frame_reg);
        } else {
                ppc_load (code, dreg, offset);
                ppc_add (code, dreg, dreg, cfg->frame_reg);
                ppc_stfd (code, ppc_f0, 0, dreg);
-               ppc_lwz (code, dreg, 4, dreg);
+               if (size == 8)
+                       ppc_ldr (code, dreg, sub_offset, dreg);
+               else
+                       ppc_lwz (code, dreg, sub_offset, dreg);
        }
        if (!is_signed) {
                if (size == 1)
                        ppc_andid (code, dreg, dreg, 0xff);
                else if (size == 2)
                        ppc_andid (code, dreg, dreg, 0xffff);
+#ifdef __mono_ppc64__
+               else if (size == 4)
+                       ppc_clrldi (code, dreg, dreg, 32);
+#endif
        } else {
                if (size == 1)
                        ppc_extsb (code, dreg, dreg);
                else if (size == 2)
                        ppc_extsh (code, dreg, dreg);
+#ifdef __mono_ppc64__
+               else if (size == 4)
+                       ppc_extsw (code, dreg, dreg);
+#endif
        }
        return code;
 }
@@ -2334,10 +2728,13 @@ typedef struct {
        int found;
 } PatchData;
 
-#define is_call_imm(diff) ((gint)(diff) >= -33554432 && (gint)(diff) <= 33554431)
+#define is_call_imm(diff) ((glong)(diff) >= -33554432 && (glong)(diff) <= 33554431)
 
 static int
 search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
+#ifdef __mono_ppc64__
+       g_assert_not_reached ();
+#else
        PatchData *pdata = (PatchData*)user_data;
        guchar *code = data;
        guint32 *thunks = data;
@@ -2354,8 +2751,7 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
                return 0;
 
        templ = (guchar*)load;
-       ppc_lis (templ, ppc_r0, (guint32)(pdata->target) >> 16);
-       ppc_ori (templ, ppc_r0, ppc_r0, (guint32)(pdata->target) & 0xffff);
+       ppc_load_sequence (templ, ppc_r0, pdata->target);
 
        //g_print ("thunk nentries: %d\n", ((char*)endthunks - (char*)thunks)/16);
        if ((pdata->found == 2) || (pdata->code >= code && pdata->code <= code + csize)) {
@@ -2363,7 +2759,6 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
                        //g_print ("looking for target: %p at %p (%08x-%08x)\n", pdata->target, thunks, thunks [0], thunks [1]);
                        if ((thunks [0] == load [0]) && (thunks [1] == load [1])) {
                                ppc_patch (pdata->code, (guchar*)thunks);
-                               mono_arch_flush_icache (pdata->code, 4);
                                pdata->found = 1;
                                /*{
                                        static int num_thunks = 0;
@@ -2375,14 +2770,13 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
                        } else if ((thunks [0] == 0) && (thunks [1] == 0)) {
                                /* found a free slot instead: emit thunk */
                                code = (guchar*)thunks;
-                               ppc_lis (code, ppc_r0, (guint32)(pdata->target) >> 16);
-                               ppc_ori (code, ppc_r0, ppc_r0, (guint32)(pdata->target) & 0xffff);
+                               ppc_lis (code, ppc_r0, (gulong)(pdata->target) >> 16);
+                               ppc_ori (code, ppc_r0, ppc_r0, (gulong)(pdata->target) & 0xffff);
                                ppc_mtctr (code, ppc_r0);
                                ppc_bcctr (code, PPC_BR_ALWAYS, 0);
                                mono_arch_flush_icache ((guchar*)thunks, 16);
 
                                ppc_patch (pdata->code, (guchar*)thunks);
-                               mono_arch_flush_icache (pdata->code, 4);
                                pdata->found = 1;
                                /*{
                                        static int num_thunks = 0;
@@ -2398,6 +2792,7 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
                }
                //g_print ("failed thunk lookup for %p from %p at %p (%d entries)\n", pdata->target, pdata->code, data, count);
        }
+#endif
        return 0;
 }
 
@@ -2412,12 +2807,12 @@ handle_thunk (int absolute, guchar *code, const guchar *target) {
        pdata.found = 0;
 
        mono_domain_lock (domain);
-       mono_code_manager_foreach (domain->code_mp, search_thunk_slot, &pdata);
+       mono_domain_code_foreach (domain, search_thunk_slot, &pdata);
 
        if (!pdata.found) {
                /* this uses the first available slot */
                pdata.found = 2;
-               mono_code_manager_foreach (domain->code_mp, search_thunk_slot, &pdata);
+               mono_domain_code_foreach (domain, search_thunk_slot, &pdata);
        }
        mono_domain_unlock (domain);
 
@@ -2426,10 +2821,17 @@ handle_thunk (int absolute, guchar *code, const guchar *target) {
        g_assert (pdata.found == 1);
 }
 
+static void
+patch_ins (guint8 *code, guint32 ins)
+{
+       *(guint32*)code = GUINT32_TO_BE (ins);
+       mono_arch_flush_icache (code, 4);
+}
+
 void
-ppc_patch (guchar *code, const guchar *target)
+ppc_patch_full (guchar *code, const guchar *target, gboolean is_fd)
 {
-       guint32 ins = *(guint32*)code;
+       guint32 ins = GUINT32_FROM_BE (*(guint32*)code);
        guint32 prim = ins >> 26;
        guint32 ovf;
 
@@ -2437,31 +2839,32 @@ ppc_patch (guchar *code, const guchar *target)
        if (prim == 18) {
                // prefer relative branches, they are more position independent (e.g. for AOT compilation).
                gint diff = target - code;
+               g_assert (!is_fd);
                if (diff >= 0){
                        if (diff <= 33554431){
                                ins = (18 << 26) | (diff) | (ins & 1);
-                               *(guint32*)code = ins;
+                               patch_ins (code, ins);
                                return;
                        }
                } else {
                        /* diff between 0 and -33554432 */
                        if (diff >= -33554432){
                                ins = (18 << 26) | (diff & ~0xfc000000) | (ins & 1);
-                               *(guint32*)code = ins;
+                               patch_ins (code, ins);
                                return;
                        }
                }
                
                if ((glong)target >= 0){
                        if ((glong)target <= 33554431){
-                               ins = (18 << 26) | ((guint32) target) | (ins & 1) | 2;
-                               *(guint32*)code = ins;
+                               ins = (18 << 26) | ((gulong) target) | (ins & 1) | 2;
+                               patch_ins (code, ins);
                                return;
                        }
                } else {
                        if ((glong)target >= -33554432){
-                               ins = (18 << 26) | (((guint32)target) & ~0xfc000000) | (ins & 1) | 2;
-                               *(guint32*)code = ins;
+                               ins = (18 << 26) | (((gulong)target) & ~0xfc000000) | (ins & 1) | 2;
+                               patch_ins (code, ins);
                                return;
                        }
                }
@@ -2474,9 +2877,10 @@ ppc_patch (guchar *code, const guchar *target)
        
        
        if (prim == 16) {
+               g_assert (!is_fd);
                // absolute address
                if (ins & 2) {
-                       guint32 li = (guint32)target;
+                       guint32 li = (gulong)target;
                        ins = (ins & 0xffff0000) | (ins & 3);
                        ovf  = li & 0xffff0000;
                        if (ovf != 0 && ovf != 0xffff0000)
@@ -2493,11 +2897,51 @@ ppc_patch (guchar *code, const guchar *target)
                        diff &= 0xffff;
                        ins |= diff;
                }
-               *(guint32*)code = ins;
+               patch_ins (code, ins);
                return;
        }
 
        if (prim == 15 || ins == 0x4e800021 || ins == 0x4e800020 || ins == 0x4e800420) {
+#ifdef __mono_ppc64__
+               guint32 *seq = (guint32*)code;
+               guint32 *branch_ins;
+
+               /* the trampoline code will try to patch the blrl, blr, bcctr */
+               if (ins == 0x4e800021 || ins == 0x4e800020 || ins == 0x4e800420) {
+                       branch_ins = seq;
+                       if (ppc_opcode (seq [-3]) == 58 || ppc_opcode (seq [-3]) == 31) /* ld || mr */
+                               code -= 32;
+                       else
+                               code -= 24;
+               } else {
+                       if (ppc_opcode (seq [5]) == 58 || ppc_opcode (seq [5]) == 31) /* ld || mr */
+                               branch_ins = seq + 8;
+                       else
+                               branch_ins = seq + 6;
+               }
+
+               seq = (guint32*)code;
+               /* this is the lis/ori/sldi/oris/ori/(ld/ld|mr/nop)/mtlr/blrl sequence */
+               g_assert (mono_ppc_is_direct_call_sequence (branch_ins));
+
+               if (ppc_opcode (seq [5]) == 58) {       /* ld */
+                       g_assert (ppc_opcode (seq [6]) == 58); /* ld */
+
+                       if (!is_fd) {
+                               guint8 *buf = (guint8*)&seq [5];
+                               ppc_mr (buf, ppc_r0, ppc_r11);
+                               ppc_nop (buf);
+                       }
+               } else {
+                       if (is_fd)
+                               target = mono_get_addr_from_ftnptr ((gpointer)target);
+               }
+
+               /* FIXME: make this thread safe */
+               /* FIXME: we're assuming we're using r11 here */
+               ppc_load_ptr_sequence (code, ppc_r11, target);
+               mono_arch_flush_icache ((guint8*)seq, 28);
+#else
                guint32 *seq;
                /* the trampoline code will try to patch the blrl, blr, bcctr */
                if (ins == 0x4e800021 || ins == 0x4e800020 || ins == 0x4e800420) {
@@ -2513,12 +2957,25 @@ ppc_patch (guchar *code, const guchar *target)
                ppc_lis (code, ppc_r0, (guint32)(target) >> 16);
                ppc_ori (code, ppc_r0, ppc_r0, (guint32)(target) & 0xffff);
                mono_arch_flush_icache (code - 8, 8);
+#endif
        } else {
                g_assert_not_reached ();
        }
 //     g_print ("patched with 0x%08x\n", ins);
 }
 
+void
+ppc_patch (guchar *code, const guchar *target)
+{
+       ppc_patch_full (code, target, FALSE);
+}
+
+void
+mono_ppc_patch (guchar *code, const guchar *target)
+{
+       ppc_patch (code, target);
+}
+
 static guint8*
 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
 {
@@ -2550,8 +3007,6 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
        guint32 i, pos;
        int struct_index = 0;
 
-       /* FIXME: Generate intermediate code instead */
-
        sig = mono_method_signature (method);
 
        /* This is the opposite of the code in emit_prolog */
@@ -2564,7 +3019,7 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
                ArgInfo *ainfo = &cinfo->ret;
                inst = cfg->vret_addr;
                g_assert (ppc_is_imm16 (inst->inst_offset));
-               ppc_lwz (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+               ppc_ldptr (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
        }
        for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
                ArgInfo *ainfo = cinfo->args + i;
@@ -2582,9 +3037,14 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
                                case 2:
                                        ppc_lhz (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                        break;
-                               default:
+#ifdef __mono_ppc64__
+                               case 4:
                                        ppc_lwz (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                        break;
+#endif
+                               default:
+                                       ppc_ldptr (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       break;
                        }
                        break;
 
@@ -2605,16 +3065,28 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
                        MonoType *type = mini_type_get_underlying_type (cfg->generic_sharing_context,
                                &inst->klass->byval_arg);
 
-                       if (!MONO_TYPE_IS_REFERENCE (type) && type->type != MONO_TYPE_I4)
+#ifndef __mono_ppc64__
+                       if (type->type == MONO_TYPE_I8)
                                NOT_IMPLEMENTED;
+#endif
+
+                       if (MONO_TYPE_IS_REFERENCE (type) || type->type == MONO_TYPE_I8) {
+                               ppc_ldptr (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                               ppc_stptr (code, ppc_r0, ainfo->offset, ainfo->reg);
+                       } else if (type->type == MONO_TYPE_I4) {
+                               ppc_lwz (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                               ppc_stw (code, ppc_r0, ainfo->offset, ainfo->reg);
+                       } else {
+                               NOT_IMPLEMENTED;
+                       }
 
-                       ppc_lwz (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
-                       ppc_stw (code, ppc_r0, ainfo->offset, ainfo->reg);
                        break;
                }
 
                case RegTypeStructByVal: {
+#ifdef __APPLE__
                        guint32 size = 0;
+#endif
                        int j;
 
                        /* FIXME: */
@@ -2632,9 +3104,13 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
                                NOT_IMPLEMENTED;
                        } else
 #endif
-                               for (j = 0; j < ainfo->size; ++j) {
-                                       ppc_lwz (code, ainfo->reg  + j,
-                                               inst->inst_offset + j * sizeof (gpointer), inst->inst_basereg);
+                               for (j = 0; j < ainfo->vtregs; ++j) {
+                                       ppc_ldptr (code, ainfo->reg + j,
+                                                       inst->inst_offset + j * sizeof (gpointer),
+                                                       inst->inst_basereg);
+                                       /* FIXME: shift to the right */
+                                       if (ainfo->bytes)
+                                               NOT_IMPLEMENTED;
                                }
                        break;
                }
@@ -2644,7 +3120,7 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
 
                        g_assert (ppc_is_imm16 (addr->inst_offset));
                        g_assert (!ainfo->offset);
-                       ppc_lwz (code, ainfo->reg, addr->inst_offset, addr->inst_basereg);
+                       ppc_ldptr (code, ainfo->reg, addr->inst_offset, addr->inst_basereg);
 
                        struct_index++;
                        break;
@@ -2715,7 +3191,7 @@ ins_native_length (MonoCompile *cfg, MonoInst *ins)
 static guint8*
 emit_reserve_param_area (MonoCompile *cfg, guint8 *code)
 {
-       int size = cfg->param_area;
+       long size = cfg->param_area;
 
        size += MONO_ARCH_FRAME_ALIGNMENT - 1;
        size &= -MONO_ARCH_FRAME_ALIGNMENT;
@@ -2723,12 +3199,12 @@ emit_reserve_param_area (MonoCompile *cfg, guint8 *code)
        if (!size)
                return code;
 
-       ppc_lwz (code, ppc_r0, 0, ppc_sp);
+       ppc_ldptr (code, ppc_r0, 0, ppc_sp);
        if (ppc_is_imm16 (-size)) {
-               ppc_stwu (code, ppc_r0, -size, ppc_sp);
+               ppc_stptr_update (code, ppc_r0, -size, ppc_sp);
        } else {
                ppc_load (code, ppc_r11, -size);
-               ppc_stwux (code, ppc_r0, ppc_sp, ppc_r11);
+               ppc_stptr_update_indexed (code, ppc_r0, ppc_sp, ppc_r11);
        }
 
        return code;
@@ -2737,7 +3213,7 @@ emit_reserve_param_area (MonoCompile *cfg, guint8 *code)
 static guint8*
 emit_unreserve_param_area (MonoCompile *cfg, guint8 *code)
 {
-       int size = cfg->param_area;
+       long size = cfg->param_area;
 
        size += MONO_ARCH_FRAME_ALIGNMENT - 1;
        size &= -MONO_ARCH_FRAME_ALIGNMENT;
@@ -2745,17 +3221,19 @@ emit_unreserve_param_area (MonoCompile *cfg, guint8 *code)
        if (!size)
                return code;
 
-       ppc_lwz (code, ppc_r0, 0, ppc_sp);
+       ppc_ldptr (code, ppc_r0, 0, ppc_sp);
        if (ppc_is_imm16 (size)) {
-               ppc_stwu (code, ppc_r0, size, ppc_sp);
+               ppc_stptr_update (code, ppc_r0, size, ppc_sp);
        } else {
                ppc_load (code, ppc_r11, size);
-               ppc_stwux (code, ppc_r0, ppc_sp, ppc_r11);
+               ppc_stptr_update_indexed (code, ppc_r0, ppc_sp, ppc_r11);
        }
 
        return code;
 }
 
+#define MASK_SHIFT_IMM(i)      ((i) & MONO_PPC_32_64_CASE (0x1f, 0x3f))
+
 void
 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 {
@@ -2766,6 +3244,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        MonoInst *last_ins = NULL;
        guint last_offset = 0;
        int max_len, cpos;
+       int L;
 
        /* we don't align basic blocks of loops on ppc */
 
@@ -2799,7 +3278,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        //              g_print ("cil code\n");
                mono_debug_record_line_number (cfg, ins, offset);
 
-               switch (ins->opcode) {
+               switch (normalize_opcode (ins->opcode)) {
                case OP_RELAXED_NOP:
                case OP_NOP:
                case OP_DUMMY_USE:
@@ -2807,6 +3286,33 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_NOT_REACHED:
                case OP_NOT_NULL:
                        break;
+               case OP_SEQ_POINT: {
+                       int i;
+
+                       if (cfg->compile_aot)
+                               NOT_IMPLEMENTED;
+
+                       /* 
+                        * Read from the single stepping trigger page. This will cause a
+                        * SIGSEGV when single stepping is enabled.
+                        * We do this _before_ the breakpoint, so single stepping after
+                        * a breakpoint is hit will step to the next IL offset.
+                        */
+                       if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
+                               ppc_load (code, ppc_r11, (gsize)ss_trigger_page);
+                               ppc_ldptr (code, ppc_r11, 0, ppc_r11);
+                       }
+
+                       mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
+
+                       /* 
+                        * A placeholder for a possible breakpoint inserted by
+                        * mono_arch_set_breakpoint ().
+                        */
+                       for (i = 0; i < BREAKPOINT_SIZE / 4; ++i)
+                               ppc_nop (code);
+                       break;
+               }
                case OP_TLS_GET:
                        emit_tls_access (code, ins->dreg, ins->inst_offset);
                        break;
@@ -2827,48 +3333,102 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stb (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_stb (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_STOREI2_MEMBASE_REG:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_sth (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_sth (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_STORE_MEMBASE_REG:
-               case OP_STOREI4_MEMBASE_REG:
                        if (ppc_is_imm16 (ins->inst_offset)) {
-                               ppc_stw (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                               ppc_stptr (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_stptr (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_stptr_indexed (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
+                       }
+                       break;
+#ifdef __mono_ilp32__
+               case OP_STOREI8_MEMBASE_REG:
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_str (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
                                ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_stwx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               ppc_str_indexed (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
+#endif
                case OP_STOREI1_MEMINDEX:
-                       ppc_stbx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_STOREI2_MEMINDEX:
-                       ppc_sthx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_STORE_MEMINDEX:
-               case OP_STOREI4_MEMINDEX:
-                       ppc_stwx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_stptr_indexed (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_LOADU4_MEM:
                        g_assert_not_reached ();
                        break;
                case OP_LOAD_MEMBASE:
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_ldptr (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       } else {
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_ldptr (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_ldptr_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
+                       }
+                       break;
                case OP_LOADI4_MEMBASE:
+#ifdef __mono_ppc64__
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_lwa (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       } else {
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lwa (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lwax (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
+                       }
+                       break;
+#endif
                case OP_LOADU4_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lwz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lwzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lwz (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lwzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADI1_MEMBASE:
@@ -2876,8 +3436,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lbz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lbzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lbz (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lbzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
                        }
                        if (ins->opcode == OP_LOADI1_MEMBASE)
                                ppc_extsb (code, ins->dreg, ins->dreg);
@@ -2886,68 +3451,102 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lhz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lhzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lhz (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lhzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADI2_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
-                               ppc_lha (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                               ppc_lha (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       } else {
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lha (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lhax (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
+                       }
+                       break;
+#ifdef __mono_ilp32__
+               case OP_LOADI8_MEMBASE:
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_ldr (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
                                ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lhax (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               ppc_ldr_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0);
                        }
                        break;
+#endif
                case OP_LOAD_MEMINDEX:
+                       ppc_ldptr_indexed (code, ins->dreg, ins->inst_basereg, ins->sreg2);
+                       break;
                case OP_LOADI4_MEMINDEX:
+#ifdef __mono_ppc64__
+                       ppc_lwax (code, ins->dreg, ins->inst_basereg, ins->sreg2);
+                       break;
+#endif
                case OP_LOADU4_MEMINDEX:
-                       ppc_lwzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lwzx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADU2_MEMINDEX:
-                       ppc_lhzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lhzx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADI2_MEMINDEX:
-                       ppc_lhax (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lhax (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADU1_MEMINDEX:
-                       ppc_lbzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lbzx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADI1_MEMINDEX:
-                       ppc_lbzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lbzx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        ppc_extsb (code, ins->dreg, ins->dreg);
                        break;
                case OP_ICONV_TO_I1:
+               CASE_PPC64 (OP_LCONV_TO_I1)
                        ppc_extsb (code, ins->dreg, ins->sreg1);
                        break;
                case OP_ICONV_TO_I2:
+               CASE_PPC64 (OP_LCONV_TO_I2)
                        ppc_extsh (code, ins->dreg, ins->sreg1);
                        break;
                case OP_ICONV_TO_U1:
-                       ppc_rlwinm (code, ins->dreg, ins->sreg1, 0, 24, 31);
+               CASE_PPC64 (OP_LCONV_TO_U1)
+                       ppc_clrlwi (code, ins->dreg, ins->sreg1, 24);
                        break;
                case OP_ICONV_TO_U2:
-                       ppc_rlwinm (code, ins->dreg, ins->sreg1, 0, 16, 31);
+               CASE_PPC64 (OP_LCONV_TO_U2)
+                       ppc_clrlwi (code, ins->dreg, ins->sreg1, 16);
                        break;
                case OP_COMPARE:
                case OP_ICOMPARE:
+               CASE_PPC64 (OP_LCOMPARE)
+                       L = (sizeof (mgreg_t) == 4 || ins->opcode == OP_ICOMPARE) ? 0 : 1;
                        next = ins->next;
                        if (next && compare_opcode_is_unsigned (next->opcode))
-                               ppc_cmpl (code, 0, 0, ins->sreg1, ins->sreg2);
+                               ppc_cmpl (code, 0, L, ins->sreg1, ins->sreg2);
                        else
-                               ppc_cmp (code, 0, 0, ins->sreg1, ins->sreg2);
+                               ppc_cmp (code, 0, L, ins->sreg1, ins->sreg2);
                        break;
                case OP_COMPARE_IMM:
                case OP_ICOMPARE_IMM:
+               CASE_PPC64 (OP_LCOMPARE_IMM)
+                       L = (sizeof (mgreg_t) == 4 || ins->opcode == OP_ICOMPARE_IMM) ? 0 : 1;
                        next = ins->next;
                        if (next && compare_opcode_is_unsigned (next->opcode)) {
                                if (ppc_is_uimm16 (ins->inst_imm)) {
-                                       ppc_cmpli (code, 0, 0, ins->sreg1, (ins->inst_imm & 0xffff));
+                                       ppc_cmpli (code, 0, L, ins->sreg1, (ins->inst_imm & 0xffff));
                                } else {
                                        g_assert_not_reached ();
                                }
                        } else {
                                if (ppc_is_imm16 (ins->inst_imm)) {
-                                       ppc_cmpi (code, 0, 0, ins->sreg1, (ins->inst_imm & 0xffff));
+                                       ppc_cmpi (code, 0, L, ins->sreg1, (ins->inst_imm & 0xffff));
                                } else {
                                        g_assert_not_reached ();
                                }
@@ -2958,9 +3557,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_ADDCC:
                case OP_IADDCC:
-                       ppc_addc (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       ppc_addco (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_IADD:
+               CASE_PPC64 (OP_LADD)
                        ppc_add (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ADC:
@@ -2976,6 +3576,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_ADD_IMM:
                case OP_IADD_IMM:
+               CASE_PPC64 (OP_LADD_IMM)
                        if (ppc_is_imm16 (ins->inst_imm)) {
                                ppc_addi (code, ins->dreg, ins->sreg1, ins->inst_imm);
                        } else {
@@ -2999,6 +3600,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
                case OP_ISUB_OVF:
+               CASE_PPC64 (OP_LSUB_OVF)
                        /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
                         */
                        ppc_subfo (code, ins->dreg, ins->sreg2, ins->sreg1);
@@ -3007,6 +3609,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
                case OP_ISUB_OVF_UN:
+               CASE_PPC64 (OP_LSUB_OVF_UN)
                        /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
                         */
                        ppc_subfc (code, ins->dreg, ins->sreg2, ins->sreg1);
@@ -3048,9 +3651,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_SUBCC:
                case OP_ISUBCC:
-                       ppc_subfc (code, ins->dreg, ins->sreg2, ins->sreg1);
+                       ppc_subfco (code, ins->dreg, ins->sreg2, ins->sreg1);
                        break;
                case OP_ISUB:
+               CASE_PPC64 (OP_LSUB)
                        ppc_subf (code, ins->dreg, ins->sreg2, ins->sreg1);
                        break;
                case OP_SBB:
@@ -3059,6 +3663,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_SUB_IMM:
                case OP_ISUB_IMM:
+               CASE_PPC64 (OP_LSUB_IMM)
                        // we add the negated value
                        if (ppc_is_imm16 (-ins->inst_imm))
                                ppc_addi (code, ins->dreg, ins->sreg1, -ins->inst_imm);
@@ -3074,11 +3679,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_subfze (code, ins->dreg, ins->sreg1);
                        break;
                case OP_IAND:
+               CASE_PPC64 (OP_LAND)
                        /* FIXME: the ppc macros as inconsistent here: put dest as the first arg! */
                        ppc_and (code, ins->sreg1, ins->dreg, ins->sreg2);
                        break;
                case OP_AND_IMM:
                case OP_IAND_IMM:
+               CASE_PPC64 (OP_LAND_IMM)
                        if (!(ins->inst_imm & 0xffff0000)) {
                                ppc_andid (code, ins->sreg1, ins->dreg, ins->inst_imm);
                        } else if (!(ins->inst_imm & 0xffff)) {
@@ -3087,27 +3694,43 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                g_assert_not_reached ();
                        }
                        break;
-               case OP_IDIV: {
+               case OP_IDIV:
+               CASE_PPC64 (OP_LDIV) {
                        guint8 *divisor_is_m1;
                          /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
                          */
-                       ppc_cmpi (code, 0, 0, ins->sreg2, -1);
+                       ppc_compare_reg_imm (code, 0, ins->sreg2, -1);
                        divisor_is_m1 = code;
                        ppc_bc (code, PPC_BR_FALSE | PPC_BR_LIKELY, PPC_BR_EQ, 0);
                        ppc_lis (code, ppc_r0, 0x8000);
-                       ppc_cmp (code, 0, 0, ins->sreg1, ppc_r0);
+#ifdef __mono_ppc64__
+                       if (ins->opcode == OP_LDIV)
+                               ppc_sldi (code, ppc_r0, ppc_r0, 32);
+#endif
+                       ppc_compare (code, 0, ins->sreg1, ppc_r0);
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "ArithmeticException");
                        ppc_patch (divisor_is_m1, code);
                         /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
                         */
-                       ppc_divwod (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       if (ins->opcode == OP_IDIV)
+                               ppc_divwod (code, ins->dreg, ins->sreg1, ins->sreg2);
+#ifdef __mono_ppc64__
+                       else
+                               ppc_divdod (code, ins->dreg, ins->sreg1, ins->sreg2);
+#endif
                        ppc_mfspr (code, ppc_r0, ppc_xer);
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
                        break;
                }
                case OP_IDIV_UN:
-                       ppc_divwuod (code, ins->dreg, ins->sreg1, ins->sreg2);
+               CASE_PPC64 (OP_LDIV_UN)
+                       if (ins->opcode == OP_IDIV_UN)
+                               ppc_divwuod (code, ins->dreg, ins->sreg1, ins->sreg2);
+#ifdef __mono_ppc64__
+                       else
+                               ppc_divduod (code, ins->dreg, ins->sreg1, ins->sreg2);
+#endif
                        ppc_mfspr (code, ppc_r0, ppc_xer);
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
@@ -3118,10 +3741,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_REM_IMM:
                        g_assert_not_reached ();
                case OP_IOR:
+               CASE_PPC64 (OP_LOR)
                        ppc_or (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_OR_IMM:
                case OP_IOR_IMM:
+               CASE_PPC64 (OP_LOR_IMM)
                        if (!(ins->inst_imm & 0xffff0000)) {
                                ppc_ori (code, ins->sreg1, ins->dreg, ins->inst_imm);
                        } else if (!(ins->inst_imm & 0xffff)) {
@@ -3131,10 +3756,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_IXOR:
+               CASE_PPC64 (OP_LXOR)
                        ppc_xor (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_IXOR_IMM:
                case OP_XOR_IMM:
+               CASE_PPC64 (OP_LXOR_IMM)
                        if (!(ins->inst_imm & 0xffff0000)) {
                                ppc_xori (code, ins->sreg1, ins->dreg, ins->inst_imm);
                        } else if (!(ins->inst_imm & 0xffff)) {
@@ -3144,23 +3771,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_ISHL:
-                       ppc_slw (code, ins->sreg1, ins->dreg, ins->sreg2);
+               CASE_PPC64 (OP_LSHL)
+                       ppc_shift_left (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SHL_IMM:
                case OP_ISHL_IMM:
-                       ppc_rlwinm (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f), 0, (31 - (ins->inst_imm & 0x1f)));
+               CASE_PPC64 (OP_LSHL_IMM)
+                       ppc_shift_left_imm (code, ins->dreg, ins->sreg1, MASK_SHIFT_IMM (ins->inst_imm));
                        break;
                case OP_ISHR:
                        ppc_sraw (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SHR_IMM:
-               case OP_ISHR_IMM:
-                       ppc_srawi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       ppc_shift_right_arith_imm (code, ins->dreg, ins->sreg1, MASK_SHIFT_IMM (ins->inst_imm));
                        break;
                case OP_SHR_UN_IMM:
-               case OP_ISHR_UN_IMM:
-                       if (ins->inst_imm)
-                               ppc_rlwinm (code, ins->dreg, ins->sreg1, (32 - (ins->inst_imm & 0x1f)), (ins->inst_imm & 0x1f), 31);
+                       if (MASK_SHIFT_IMM (ins->inst_imm))
+                               ppc_shift_right_imm (code, ins->dreg, ins->sreg1, MASK_SHIFT_IMM (ins->inst_imm));
                        else
                                ppc_mr (code, ins->dreg, ins->sreg1);
                        break;
@@ -3168,16 +3795,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_srw (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_INOT:
+               CASE_PPC64 (OP_LNOT)
                        ppc_not (code, ins->dreg, ins->sreg1);
                        break;
                case OP_INEG:
+               CASE_PPC64 (OP_LNEG)
                        ppc_neg (code, ins->dreg, ins->sreg1);
                        break;
                case OP_IMUL:
-                       ppc_mullw (code, ins->dreg, ins->sreg1, ins->sreg2);
+               CASE_PPC64 (OP_LMUL)
+                       ppc_multiply (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_IMUL_IMM:
                case OP_MUL_IMM:
+               CASE_PPC64 (OP_LMUL_IMM)
                        if (ppc_is_imm16 (ins->inst_imm)) {
                            ppc_mulli (code, ins->dreg, ins->sreg1, ins->inst_imm);
                        } else {
@@ -3185,34 +3816,62 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_IMUL_OVF:
+               CASE_PPC64 (OP_LMUL_OVF)
                        /* we annot use mcrxr, since it's not implemented on some processors 
                         * XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
                         */
-                       ppc_mullwo (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       if (ins->opcode == OP_IMUL_OVF)
+                               ppc_mullwo (code, ins->dreg, ins->sreg1, ins->sreg2);
+#ifdef __mono_ppc64__
+                       else
+                               ppc_mulldo (code, ins->dreg, ins->sreg1, ins->sreg2);
+#endif
                        ppc_mfspr (code, ppc_r0, ppc_xer);
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
                case OP_IMUL_OVF_UN:
+               CASE_PPC64 (OP_LMUL_OVF_UN)
                        /* we first multiply to get the high word and compare to 0
                         * to set the flags, then the result is discarded and then 
                         * we multiply to get the lower * bits result
                         */
-                       ppc_mulhwu (code, ppc_r0, ins->sreg1, ins->sreg2);
+                       if (ins->opcode == OP_IMUL_OVF_UN)
+                               ppc_mulhwu (code, ppc_r0, ins->sreg1, ins->sreg2);
+#ifdef __mono_ppc64__
+                       else
+                               ppc_mulhdu (code, ppc_r0, ins->sreg1, ins->sreg2);
+#endif
                        ppc_cmpi (code, 0, 0, ppc_r0, 0);
                        EMIT_COND_SYSTEM_EXCEPTION (CEE_BNE_UN - CEE_BEQ, "OverflowException");
-                       ppc_mullw (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       ppc_multiply (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ICONST:
                        ppc_load (code, ins->dreg, ins->inst_c0);
                        break;
+               case OP_I8CONST: {
+                       ppc_load (code, ins->dreg, ins->inst_l);
+                       break;
+               }
+               case OP_LOAD_GOTADDR:
+                       /* The PLT implementation depends on this */
+                       g_assert (ins->dreg == ppc_r30);
+
+                       code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
+                       break;
+               case OP_GOT_ENTRY:
+                       // FIXME: Fix max instruction length
+                       mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
+                       /* arch_emit_got_access () patches this */
+                       ppc_load32 (code, ppc_r0, 0);
+                       ppc_ldptr_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                       break;
                case OP_AOTCONST:
                        mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
-                       ppc_lis (code, ins->dreg, 0);
-                       ppc_ori (code, ins->dreg, ins->dreg, 0);
+                       ppc_load_sequence (code, ins->dreg, 0);
                        break;
-               case OP_ICONV_TO_I4:
-               case OP_ICONV_TO_U4:
+               CASE_PPC32 (OP_ICONV_TO_I4)
+               CASE_PPC32 (OP_ICONV_TO_U4)
                case OP_MOVE:
                        ppc_mr (code, ins->dreg, ins->sreg1);
                        break;
@@ -3235,7 +3894,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_frsp (code, ins->dreg, ins->sreg1);
                        break;
                case OP_JMP: {
-                       int i, pos = 0;
+                       int i, pos;
                        
                        /*
                         * Keep in sync with mono_arch_emit_epilog
@@ -3246,11 +3905,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                         * we're leaving the method.
                         */
                        if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-                               if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) {
-                                       ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
+                               long ret_offset = cfg->stack_usage + PPC_RET_ADDR_OFFSET;
+                               if (ppc_is_imm16 (ret_offset)) {
+                                       ppc_ldptr (code, ppc_r0, ret_offset, cfg->frame_reg);
                                } else {
-                                       ppc_load (code, ppc_r11, cfg->stack_usage + PPC_RET_ADDR_OFFSET);
-                                       ppc_lwzx (code, ppc_r0, cfg->frame_reg, ppc_r11);
+                                       ppc_load (code, ppc_r11, ret_offset);
+                                       ppc_ldptr_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11);
                                }
                                ppc_mtlr (code, ppc_r0);
                        }
@@ -3258,10 +3918,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        code = emit_load_volatile_arguments (cfg, code);
 
                        if (ppc_is_imm16 (cfg->stack_usage)) {
-                               ppc_addic (code, ppc_sp, cfg->frame_reg, cfg->stack_usage);
+                               ppc_addi (code, ppc_r11, cfg->frame_reg, cfg->stack_usage);
                        } else {
-                               ppc_load (code, ppc_r11, cfg->stack_usage);
-                               ppc_add (code, ppc_sp, cfg->frame_reg, ppc_r11);
+                               /* cfg->stack_usage is an int, so we can use
+                                * an addis/addi sequence here even in 64-bit.  */
+                               ppc_addis (code, ppc_r11, cfg->frame_reg, ppc_ha(cfg->stack_usage));
+                               ppc_addi (code, ppc_r11, ppc_r11, cfg->stack_usage);
                        }
                        if (!cfg->method->save_lmf) {
                                /*for (i = 31; i >= 14; --i) {
@@ -3270,32 +3932,47 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                                ppc_lfd (code, i, -pos, cfg->frame_reg);
                                        }
                                }*/
-                               /* FIXME: restore registers before changing ppc_sp */
+                               pos = 0;
                                for (i = 31; i >= 13; --i) {
                                        if (cfg->used_int_regs & (1 << i)) {
-                                               pos += sizeof (gulong);
-                                               ppc_lwz (code, i, -pos, ppc_sp);
+                                               pos += sizeof (gpointer);
+                                               ppc_ldptr (code, i, -pos, ppc_r11);
                                        }
                                }
                        } else {
                                /* FIXME restore from MonoLMF: though this can't happen yet */
                        }
+                       ppc_mr (code, ppc_sp, ppc_r11);
                        mono_add_patch_info (cfg, (guint8*) code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
-                       ppc_b (code, 0);
+                       if (cfg->compile_aot) {
+                               /* arch_emit_got_access () patches this */
+                               ppc_load32 (code, ppc_r0, 0);
+#ifdef PPC_USES_FUNCTION_DESCRIPTOR
+                               ppc_ldptr_indexed (code, ppc_r11, ppc_r30, ppc_r0);
+                               ppc_ldptr (code, ppc_r0, 0, ppc_r11);
+#else
+                               ppc_ldptr_indexed (code, ppc_r0, ppc_r30, ppc_r0);
+#endif
+                               ppc_mtctr (code, ppc_r0);
+                               ppc_bcctr (code, PPC_BR_ALWAYS, 0);
+                       } else {
+                               ppc_b (code, 0);
+                       }
                        break;
                }
                case OP_CHECK_THIS:
                        /* ensure ins->sreg1 is not NULL */
-                       ppc_lwz (code, ppc_r0, 0, ins->sreg1);
+                       ppc_ldptr (code, ppc_r0, 0, ins->sreg1);
                        break;
                case OP_ARGLIST: {
-                       if (ppc_is_imm16 (cfg->sig_cookie + cfg->stack_usage)) {
-                               ppc_addi (code, ppc_r0, cfg->frame_reg, cfg->sig_cookie + cfg->stack_usage);
+                       long cookie_offset = cfg->sig_cookie + cfg->stack_usage;
+                       if (ppc_is_imm16 (cookie_offset)) {
+                               ppc_addi (code, ppc_r0, cfg->frame_reg, cookie_offset);
                        } else {
-                               ppc_load (code, ppc_r0, cfg->sig_cookie + cfg->stack_usage);
+                               ppc_load (code, ppc_r0, cookie_offset);
                                ppc_add (code, ppc_r0, cfg->frame_reg, ppc_r0);
                        }
-                       ppc_stw (code, ppc_r0, 0, ins->sreg1);
+                       ppc_stptr (code, ppc_r0, 0, ins->sreg1);
                        break;
                }
                case OP_FCALL:
@@ -3309,9 +3986,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
                        else
                                mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
-                       if (FORCE_INDIR_CALL || cfg->method->dynamic) {
-                               ppc_lis (code, ppc_r0, 0);
-                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                       if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) {
+                               ppc_load_func (code, ppc_r0, 0);
                                ppc_mtlr (code, ppc_r0);
                                ppc_blrl (code);
                        } else {
@@ -3326,7 +4002,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_VCALL2_REG:
                case OP_VOIDCALL_REG:
                case OP_CALL_REG:
+#ifdef PPC_USES_FUNCTION_DESCRIPTOR
+                       ppc_ldptr (code, ppc_r0, 0, ins->sreg1);
+                       /* FIXME: if we know that this is a method, we
+                          can omit this load */
+                       ppc_ldptr (code, ppc_r2, 8, ins->sreg1);
+                       ppc_mtlr (code, ppc_r0);
+#else
                        ppc_mtlr (code, ins->sreg1);
+#endif
                        ppc_blrl (code);
                        /* FIXME: this should be handled somewhere else in the new jit */
                        code = emit_move_return_value (cfg, ins, code);
@@ -3337,7 +4021,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_VCALL2_MEMBASE:
                case OP_VOIDCALL_MEMBASE:
                case OP_CALL_MEMBASE:
-                       ppc_lwz (code, ppc_r0, ins->inst_offset, ins->sreg1);
+                       if (cfg->compile_aot && ins->sreg1 == ppc_r11) {
+                               /* The trampolines clobber this */
+                               ppc_mr (code, ppc_r29, ins->sreg1);
+                               ppc_ldptr (code, ppc_r0, ins->inst_offset, ppc_r29);
+                       } else {
+                               ppc_ldptr (code, ppc_r0, ins->inst_offset, ins->sreg1);
+                       }
                        ppc_mtlr (code, ppc_r0);
                        ppc_blrl (code);
                        /* FIXME: this should be handled somewhere else in the new jit */
@@ -3350,7 +4040,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        int area_offset = alloca_waste;
                        area_offset &= ~31;
                        ppc_addi (code, ppc_r11, ins->sreg1, alloca_waste + 31);
-                       ppc_rlwinm (code, ppc_r11, ppc_r11, 0, 0, 27);
+                       /* FIXME: should be calculated from MONO_ARCH_FRAME_ALIGNMENT */
+                       ppc_clear_right_imm (code, ppc_r11, ppc_r11, 4);
                        /* use ctr to store the number of words to 0 if needed */
                        if (ins->flags & MONO_INST_INIT) {
                                /* we zero 4 bytes at a time:
@@ -3359,13 +4050,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                 * it negative and iterate billions of times.
                                 */
                                ppc_addi (code, ppc_r0, ins->sreg1, 7);
-                               ppc_srawi (code, ppc_r0, ppc_r0, 2);
+                               ppc_shift_right_arith_imm (code, ppc_r0, ppc_r0, 2);
                                ppc_mtctr (code, ppc_r0);
                        }
-                       ppc_lwz (code, ppc_r0, 0, ppc_sp);
+                       ppc_ldptr (code, ppc_r0, 0, ppc_sp);
                        ppc_neg (code, ppc_r11, ppc_r11);
-                       ppc_stwux (code, ppc_r0, ppc_sp, ppc_r11);
-                       
+                       ppc_stptr_update_indexed (code, ppc_r0, ppc_sp, ppc_r11);
+
+                       /* FIXME: make this loop work in 8 byte
+                          increments on PPC64 */
                        if (ins->flags & MONO_INST_INIT) {
                                /* adjust the dest reg by -4 so we can use stwu */
                                /* we actually adjust -8 because we let the loop
@@ -3387,9 +4080,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mr (code, ppc_r3, ins->sreg1);
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                             (gpointer)"mono_arch_throw_exception");
-                       if (FORCE_INDIR_CALL || cfg->method->dynamic) {
-                               ppc_lis (code, ppc_r0, 0);
-                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                       if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) {
+                               ppc_load_func (code, ppc_r0, 0);
                                ppc_mtlr (code, ppc_r0);
                                ppc_blrl (code);
                        } else {
@@ -3402,9 +4094,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mr (code, ppc_r3, ins->sreg1);
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                             (gpointer)"mono_arch_rethrow_exception");
-                       if (FORCE_INDIR_CALL || cfg->method->dynamic) {
-                               ppc_lis (code, ppc_r0, 0);
-                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                       if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) {
+                               ppc_load_func (code, ppc_r0, 0);
                                ppc_mtlr (code, ppc_r0);
                                ppc_blrl (code);
                        } else {
@@ -3418,10 +4109,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        code = emit_reserve_param_area (cfg, code);
                        ppc_mflr (code, ppc_r0);
                        if (ppc_is_imm16 (spvar->inst_offset)) {
-                               ppc_stw (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg);
+                               ppc_stptr (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg);
                        } else {
                                ppc_load (code, ppc_r11, spvar->inst_offset);
-                               ppc_stwx (code, ppc_r0, ppc_r11, spvar->inst_basereg);
+                               ppc_stptr_indexed (code, ppc_r0, ppc_r11, spvar->inst_basereg);
                        }
                        break;
                }
@@ -3432,10 +4123,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ins->sreg1 != ppc_r3)
                                ppc_mr (code, ppc_r3, ins->sreg1);
                        if (ppc_is_imm16 (spvar->inst_offset)) {
-                               ppc_lwz (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg);
+                               ppc_ldptr (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg);
                        } else {
                                ppc_load (code, ppc_r11, spvar->inst_offset);
-                               ppc_lwzx (code, ppc_r0, spvar->inst_basereg, ppc_r11);
+                               ppc_ldptr_indexed (code, ppc_r0, spvar->inst_basereg, ppc_r11);
                        }
                        ppc_mtlr (code, ppc_r0);
                        ppc_blr (code);
@@ -3445,7 +4136,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
                        g_assert (spvar->inst_basereg != ppc_sp);
                        code = emit_unreserve_param_area (cfg, code);
-                       ppc_lwz (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg);
+                       ppc_ldptr (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg);
                        ppc_mtlr (code, ppc_r0);
                        ppc_blr (code);
                        break;
@@ -3458,22 +4149,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ins->inst_c0 = code - cfg->native_code;
                        break;
                case OP_BR:
-                       if (ins->flags & MONO_INST_BRLABEL) {
-                               /*if (ins->inst_i0->inst_c0) {
-                                       ppc_b (code, 0);
-                                       //x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
-                               } else*/ {
-                                       mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
-                                       ppc_b (code, 0);
-                               }
-                       } else {
-                               /*if (ins->inst_target_bb->native_offset) {
-                                       ppc_b (code, 0);
-                                       //x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
-                               } else*/ {
-                                       mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
-                                       ppc_b (code, 0);
-                               } 
+                       /*if (ins->inst_target_bb->native_offset) {
+                               ppc_b (code, 0);
+                               //x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
+                       } else*/ {
+                               mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
+                               ppc_b (code, 0);
                        }
                        break;
                case OP_BR_REG:
@@ -3482,6 +4163,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_CEQ:
                case OP_ICEQ:
+               CASE_PPC64 (OP_LCEQ)
                        ppc_li (code, ins->dreg, 0);
                        ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 2);
                        ppc_li (code, ins->dreg, 1);
@@ -3490,6 +4172,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_CLT_UN:
                case OP_ICLT:
                case OP_ICLT_UN:
+               CASE_PPC64 (OP_LCLT)
+               CASE_PPC64 (OP_LCLT_UN)
                        ppc_li (code, ins->dreg, 1);
                        ppc_bc (code, PPC_BR_TRUE, PPC_BR_LT, 2);
                        ppc_li (code, ins->dreg, 0);
@@ -3498,6 +4182,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_CGT_UN:
                case OP_ICGT:
                case OP_ICGT_UN:
+               CASE_PPC64 (OP_LCGT)
+               CASE_PPC64 (OP_LCGT_UN)
                        ppc_li (code, ins->dreg, 1);
                        ppc_bc (code, PPC_BR_TRUE, PPC_BR_GT, 2);
                        ppc_li (code, ins->dreg, 0);
@@ -3526,21 +4212,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COND_EXC_ILE_UN:
                        EMIT_COND_SYSTEM_EXCEPTION (ins->opcode - OP_COND_EXC_IEQ, ins->inst_p1);
                        break;
-               case OP_COND_EXC_C:
-                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
-                        */
-                       /*ppc_mfspr (code, ppc_r0, ppc_xer);
-                       ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
-                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
-                       break;*/
-               case OP_COND_EXC_OV:
-                       /*ppc_mcrxr (code, 0);
-                       EMIT_COND_SYSTEM_EXCEPTION (CEE_BGT - CEE_BEQ, ins->inst_p1);
-                       break;*/
-               case OP_COND_EXC_NC:
-               case OP_COND_EXC_NO:
-                       g_assert_not_reached ();
-                       break;
                case OP_IBEQ:
                case OP_IBNE_UN:
                case OP_IBLT:
@@ -3556,22 +4227,43 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                /* floating point opcodes */
                case OP_R8CONST:
+                       g_assert (cfg->compile_aot);
+
+                       /* FIXME: Optimize this */
+                       ppc_bl (code, 1);
+                       ppc_mflr (code, ppc_r11);
+                       ppc_b (code, 3);
+                       *(double*)code = *(double*)ins->inst_p0;
+                       code += 8;
+                       ppc_lfd (code, ins->dreg, 8, ppc_r11);
+                       break;
                case OP_R4CONST:
                        g_assert_not_reached ();
+                       break;
                case OP_STORER8_MEMBASE_REG:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stfd (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_stfd (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADR8_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lfd (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lfdx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_lfd (code, ins->dreg, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lfdx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_STORER4_MEMBASE_REG:
@@ -3579,30 +4271,40 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stfs (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_stfs (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADR4_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lfs (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lfsx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_lfs (code, ins->dreg, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lfsx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADR4_MEMINDEX:
-                       ppc_lfsx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lfsx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADR8_MEMINDEX:
-                       ppc_lfdx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lfdx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_STORER4_MEMINDEX:
                        ppc_frsp (code, ins->sreg1, ins->sreg1);
-                       ppc_stfsx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_STORER8_MEMINDEX:
-                       ppc_stfdx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case CEE_CONV_R_UN:
                case CEE_CONV_R4: /* FIXME: change precision */
@@ -3628,17 +4330,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FCONV_TO_U:
                        code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE);
                        break;
-               case OP_FCONV_TO_I8:
-               case OP_FCONV_TO_U8:
-                       g_assert_not_reached ();
-                       /* Implemented as helper calls */
-                       break;
                case OP_LCONV_TO_R_UN:
                        g_assert_not_reached ();
                        /* Implemented as helper calls */
                        break;
                case OP_LCONV_TO_OVF_I4_2:
                case OP_LCONV_TO_OVF_I: {
+#ifdef __mono_ppc64__
+                       NOT_IMPLEMENTED;
+#else
                        guint8 *negative_branch, *msword_positive_branch, *msword_negative_branch, *ovf_ex_target;
                        // Check if its negative
                        ppc_cmpi (code, 0, 0, ins->sreg1, 0);
@@ -3662,6 +4362,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ins->dreg != ins->sreg1)
                                ppc_mr (code, ins->dreg, ins->sreg1);
                        break;
+#endif
                }
                case OP_SQRT:
                        ppc_fsqrtd (code, ins->dreg, ins->sreg1);
@@ -3766,17 +4467,177 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_JUMP_TABLE:
                        mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
-                       ppc_load (code, ins->dreg, 0x0f0f0f0f);
+#ifdef __mono_ppc64__
+                       ppc_load_sequence (code, ins->dreg, (guint64)0x0f0f0f0f0f0f0f0fLL);
+#else
+                       ppc_load_sequence (code, ins->dreg, (gulong)0x0f0f0f0fL);
+#endif
+                       break;
+               }
+
+#ifdef __mono_ppc64__
+               case OP_ICONV_TO_I4:
+               case OP_SEXT_I4:
+                       ppc_extsw (code, ins->dreg, ins->sreg1);
+                       break;
+               case OP_ICONV_TO_U4:
+               case OP_ZEXT_I4:
+                       ppc_clrldi (code, ins->dreg, ins->sreg1, 32);
+                       break;
+               case OP_ICONV_TO_R4:
+               case OP_ICONV_TO_R8:
+               case OP_LCONV_TO_R4:
+               case OP_LCONV_TO_R8: {
+                       int tmp;
+                       if (ins->opcode == OP_ICONV_TO_R4 || ins->opcode == OP_ICONV_TO_R8) {
+                               ppc_extsw (code, ppc_r0, ins->sreg1);
+                               tmp = ppc_r0;
+                       } else {
+                               tmp = ins->sreg1;
+                       }
+                       if (cpu_hw_caps & PPC_MOVE_FPR_GPR) {
+                               ppc_mffgpr (code, ins->dreg, tmp);
+                       } else {
+                               ppc_str (code, tmp, -8, ppc_r1);
+                               ppc_lfd (code, ins->dreg, -8, ppc_r1);
+                       }
+                       ppc_fcfid (code, ins->dreg, ins->dreg);
+                       if (ins->opcode == OP_ICONV_TO_R4 || ins->opcode == OP_LCONV_TO_R4)
+                               ppc_frsp (code, ins->dreg, ins->dreg);
+                       break;
+               }
+               case OP_LSHR:
+                       ppc_srad (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_LSHR_UN:
+                       ppc_srd (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_COND_EXC_C:
+                       /* check XER [0-3] (SO, OV, CA): we can't use mcrxr
+                        */
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1 << 13)); /* CA */
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, ins->inst_p1);
+                       break;
+               case OP_COND_EXC_OV:
+                       ppc_mfspr (code, ppc_r0, ppc_xer);
+                       ppc_andisd (code, ppc_r0, ppc_r0, (1 << 14)); /* OV */
+                       EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, ins->inst_p1);
+                       break;
+               case OP_LBEQ:
+               case OP_LBNE_UN:
+               case OP_LBLT:
+               case OP_LBLT_UN:
+               case OP_LBGT:
+               case OP_LBGT_UN:
+               case OP_LBGE:
+               case OP_LBGE_UN:
+               case OP_LBLE:
+               case OP_LBLE_UN:
+                       EMIT_COND_BRANCH (ins, ins->opcode - OP_LBEQ);
+                       break;
+               case OP_FCONV_TO_I8:
+                       code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, TRUE);
+                       break;
+               case OP_FCONV_TO_U8:
+                       code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, FALSE);
+                       break;
+               case OP_STOREI4_MEMBASE_REG:
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_stw (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_stwx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                       }
+                       break;
+               case OP_STOREI4_MEMINDEX:
+                       ppc_stwx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       break;
+               case OP_ISHR_IMM:
+                       ppc_srawi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       break;
+               case OP_ISHR_UN_IMM:
+                       if (ins->inst_imm & 0x1f)
+                               ppc_srwi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       else
+                               ppc_mr (code, ins->dreg, ins->sreg1);
+                       break;
+               case OP_ATOMIC_ADD_NEW_I4:
+               case OP_ATOMIC_ADD_NEW_I8: {
+                       guint8 *loop = code, *branch;
+                       g_assert (ins->inst_offset == 0);
+                       if (ins->opcode == OP_ATOMIC_ADD_NEW_I4)
+                               ppc_lwarx (code, ppc_r0, 0, ins->inst_basereg);
+                       else
+                               ppc_ldarx (code, ppc_r0, 0, ins->inst_basereg);
+                       ppc_add (code, ppc_r0, ppc_r0, ins->sreg2);
+                       if (ins->opcode == OP_ATOMIC_ADD_NEW_I4)
+                               ppc_stwcxd (code, ppc_r0, 0, ins->inst_basereg);
+                       else
+                               ppc_stdcxd (code, ppc_r0, 0, ins->inst_basereg);
+                       branch = code;
+                       ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
+                       ppc_patch (branch, loop);
+                       ppc_mr (code, ins->dreg, ppc_r0);
+                       break;
+               }
+#else
+               case OP_ICONV_TO_R4:
+               case OP_ICONV_TO_R8: {
+                       if (cpu_hw_caps & PPC_ISA_64) {
+                               ppc_srawi(code, ppc_r0, ins->sreg1, 31);
+                               ppc_stw (code, ppc_r0, -8, ppc_r1);
+                               ppc_stw (code, ins->sreg1, -4, ppc_r1);
+                               ppc_lfd (code, ins->dreg, -8, ppc_r1);
+                               ppc_fcfid (code, ins->dreg, ins->dreg);
+                               if (ins->opcode == OP_ICONV_TO_R4)
+                                       ppc_frsp (code, ins->dreg, ins->dreg);
+                               }
+                       break;
+               }
+#endif
+               case OP_ATOMIC_CAS_I4:
+               CASE_PPC64 (OP_ATOMIC_CAS_I8) {
+                       int location = ins->sreg1;
+                       int value = ins->sreg2;
+                       int comparand = ins->sreg3;
+                       guint8 *start, *not_equal, *lost_reservation;
+
+                       start = code;
+                       if (ins->opcode == OP_ATOMIC_CAS_I4)
+                               ppc_lwarx (code, ppc_r0, 0, location);
+#ifdef __mono_ppc64__
+                       else
+                               ppc_ldarx (code, ppc_r0, 0, location);
+#endif
+                       ppc_cmp (code, 0, ins->opcode == OP_ATOMIC_CAS_I4 ? 0 : 1, ppc_r0, comparand);
+
+                       not_equal = code;
+                       ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
+                       if (ins->opcode == OP_ATOMIC_CAS_I4)
+                               ppc_stwcxd (code, value, 0, location);
+#ifdef __mono_ppc64__
+                       else
+                               ppc_stdcxd (code, value, 0, location);
+#endif
+
+                       lost_reservation = code;
+                       ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
+                       ppc_patch (lost_reservation, start);
+
+                       ppc_patch (not_equal, code);
+                       ppc_mr (code, ins->dreg, ppc_r0);
                        break;
                }
+
                default:
                        g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
                        g_assert_not_reached ();
                }
 
                if ((cfg->opt & MONO_OPT_BRANCH) && ((code - cfg->native_code - offset) > max_len)) {
-                       g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
-                                  mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
+                       g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
+                                  mono_inst_name (ins->opcode), max_len, (glong)(code - cfg->native_code - offset));
                        g_assert_not_reached ();
                }
               
@@ -3792,28 +4653,54 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 void
 mono_arch_register_lowlevel_calls (void)
 {
+       /* The signature doesn't matter */
+       mono_register_jit_icall (mono_ppc_throw_exception, "mono_ppc_throw_exception", mono_create_icall_signature ("void"), TRUE);
 }
 
-#define patch_lis_ori(ip,val) do {\
+#ifdef __mono_ppc64__
+#define patch_load_sequence(ip,val) do {\
+               guint16 *__load = (guint16*)(ip);       \
+               g_assert (sizeof (val) == sizeof (gsize)); \
+               __load [1] = (((guint64)(gsize)(val)) >> 48) & 0xffff;  \
+               __load [3] = (((guint64)(gsize)(val)) >> 32) & 0xffff;  \
+               __load [7] = (((guint64)(gsize)(val)) >> 16) & 0xffff;  \
+               __load [9] =  ((guint64)(gsize)(val))        & 0xffff;  \
+       } while (0)
+#else
+#define patch_load_sequence(ip,val) do {\
                guint16 *__lis_ori = (guint16*)(ip);    \
-               __lis_ori [1] = (((guint32)(val)) >> 16) & 0xffff;      \
-               __lis_ori [3] = ((guint32)(val)) & 0xffff;      \
+               __lis_ori [1] = (((gulong)(val)) >> 16) & 0xffff;       \
+               __lis_ori [3] = ((gulong)(val)) & 0xffff;       \
        } while (0)
+#endif
 
 void
 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
 {
        MonoJumpInfo *patch_info;
+       gboolean compile_aot = !run_cctors;
 
        for (patch_info = ji; patch_info; patch_info = patch_info->next) {
                unsigned char *ip = patch_info->ip.i + code;
                unsigned char *target;
+               gboolean is_fd = FALSE;
 
                target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
 
+               if (compile_aot) {
+                       switch (patch_info->type) {
+                       case MONO_PATCH_INFO_BB:
+                       case MONO_PATCH_INFO_LABEL:
+                               break;
+                       default:
+                               /* No need to patch these */
+                               continue;
+                       }
+               }
+
                switch (patch_info->type) {
                case MONO_PATCH_INFO_IP:
-                       patch_lis_ori (ip, ip);
+                       patch_load_sequence (ip, ip);
                        continue;
                case MONO_PATCH_INFO_METHOD_REL:
                        g_assert_not_reached ();
@@ -3823,10 +4710,10 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                        gpointer *table = (gpointer *)patch_info->data.table->table;
                        int i;
 
-                       patch_lis_ori (ip, table);
+                       patch_load_sequence (ip, table);
 
-                       for (i = 0; i < patch_info->data.table->table_size; i++) { 
-                               table [i] = (int)patch_info->data.table->table [i] + code;
+                       for (i = 0; i < patch_info->data.table->table_size; i++) {
+                               table [i] = (glong)patch_info->data.table->table [i] + code;
                        }
                        /* we put into the table the absolute address, no need for ppc_patch in this case */
                        continue;
@@ -3842,7 +4729,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
                case MONO_PATCH_INFO_LDTOKEN:
                        /* from OP_AOTCONST : lis + ori */
-                       patch_lis_ori (ip, target);
+                       patch_load_sequence (ip, target);
                        continue;
                case MONO_PATCH_INFO_R4:
                case MONO_PATCH_INFO_R8:
@@ -3858,11 +4745,53 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                case MONO_PATCH_INFO_EXC_OVF:
                        /* everything is dealt with at epilog output time */
                        continue;
+#ifdef PPC_USES_FUNCTION_DESCRIPTOR
+               case MONO_PATCH_INFO_INTERNAL_METHOD:
+               case MONO_PATCH_INFO_ABS:
+               case MONO_PATCH_INFO_CLASS_INIT:
+               case MONO_PATCH_INFO_RGCTX_FETCH:
+                       is_fd = TRUE;
+                       break;
+#endif
                default:
                        break;
                }
-               ppc_patch (ip, target);
+               ppc_patch_full (ip, target, is_fd);
+       }
+}
+
+/*
+ * Emit code to save the registers in used_int_regs or the registers in the MonoLMF
+ * structure at positive offset pos from register base_reg. pos is guaranteed to fit into
+ * the instruction offset immediate for all the registers.
+ */
+static guint8*
+save_registers (MonoCompile *cfg, guint8* code, int pos, int base_reg, gboolean save_lmf, guint32 used_int_regs, int cfa_offset)
+{
+       int i;
+       if (!save_lmf) {
+               for (i = 13; i <= 31; i++) {
+                       if (used_int_regs & (1 << i)) {
+                               ppc_str (code, i, pos, base_reg);
+                               mono_emit_unwind_op_offset (cfg, code, i, pos - cfa_offset);
+                               pos += sizeof (mgreg_t);
+                       }
+               }
+       } else {
+               /* pos is the start of the MonoLMF structure */
+               int offset = pos + G_STRUCT_OFFSET (MonoLMF, iregs);
+               for (i = 13; i <= 31; i++) {
+                       ppc_str (code, i, offset, base_reg);
+                       mono_emit_unwind_op_offset (cfg, code, i, offset - cfa_offset);
+                       offset += sizeof (mgreg_t);
+               }
+               offset = pos + G_STRUCT_OFFSET (MonoLMF, fregs);
+               for (i = 14; i < 32; i++) {
+                       ppc_stfd (code, i, offset, base_reg);
+                       offset += sizeof (gdouble);
+               }
        }
+       return code;
 }
 
 /*
@@ -3890,7 +4819,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        MonoBasicBlock *bb;
        MonoMethodSignature *sig;
        MonoInst *inst;
-       int alloc_size, pos, max_offset, i;
+       long alloc_size, pos, max_offset, cfa_offset;
+       int i;
        guint8 *code;
        CallInfo *cinfo;
        int tracing = 0;
@@ -3901,66 +4831,69 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                tracing = 1;
 
        sig = mono_method_signature (method);
-       cfg->code_size = 256 + sig->param_count * 20;
+       cfg->code_size = MONO_PPC_32_64_CASE (260, 384) + sig->param_count * 20;
        code = cfg->native_code = g_malloc (cfg->code_size);
 
+       cfa_offset = 0;
+
+       /* We currently emit unwind info for aot, but don't use it */
+       mono_emit_unwind_op_def_cfa (cfg, code, ppc_r1, 0);
+
        if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
                ppc_mflr (code, ppc_r0);
-               ppc_stw (code, ppc_r0, PPC_RET_ADDR_OFFSET, ppc_sp);
+               ppc_str (code, ppc_r0, PPC_RET_ADDR_OFFSET, ppc_sp);
+               mono_emit_unwind_op_offset (cfg, code, ppc_lr, PPC_RET_ADDR_OFFSET);
        }
 
        alloc_size = cfg->stack_offset;
        pos = 0;
 
        if (!method->save_lmf) {
-               /*for (i = 31; i >= 14; --i) {
-                       if (cfg->used_float_regs & (1 << i)) {
-                               pos += sizeof (gdouble);
-                               ppc_stfd (code, i, -pos, ppc_sp);
-                       }
-               }*/
                for (i = 31; i >= 13; --i) {
                        if (cfg->used_int_regs & (1 << i)) {
-                               pos += sizeof (gulong);
-                               ppc_stw (code, i, -pos, ppc_sp);
+                               pos += sizeof (mgreg_t);
                        }
                }
        } else {
-               int ofs;
                pos += sizeof (MonoLMF);
                lmf_offset = pos;
-               ofs = -pos + G_STRUCT_OFFSET(MonoLMF, iregs);
-               ppc_stmw (code, ppc_r13, ppc_r1, ofs);
-               for (i = 14; i < 32; i++) {
-                       ppc_stfd (code, i, (-pos + G_STRUCT_OFFSET(MonoLMF, fregs) + ((i-14) * sizeof (gdouble))), ppc_r1);
-               }
        }
        alloc_size += pos;
-       // align to PPC_STACK_ALIGNMENT bytes
-       if (alloc_size & (PPC_STACK_ALIGNMENT - 1)) {
-               alloc_size += PPC_STACK_ALIGNMENT - 1;
-               alloc_size &= ~(PPC_STACK_ALIGNMENT - 1);
+       // align to MONO_ARCH_FRAME_ALIGNMENT bytes
+       if (alloc_size & (MONO_ARCH_FRAME_ALIGNMENT - 1)) {
+               alloc_size += MONO_ARCH_FRAME_ALIGNMENT - 1;
+               alloc_size &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
        }
 
        cfg->stack_usage = alloc_size;
-       g_assert ((alloc_size & (PPC_STACK_ALIGNMENT-1)) == 0);
+       g_assert ((alloc_size & (MONO_ARCH_FRAME_ALIGNMENT-1)) == 0);
        if (alloc_size) {
                if (ppc_is_imm16 (-alloc_size)) {
-                       ppc_stwu (code, ppc_sp, -alloc_size, ppc_sp);
+                       ppc_str_update (code, ppc_sp, -alloc_size, ppc_sp);
+                       cfa_offset = alloc_size;
+                       mono_emit_unwind_op_def_cfa_offset (cfg, code, alloc_size);
+                       code = save_registers (cfg, code, alloc_size - pos, ppc_sp, method->save_lmf, cfg->used_int_regs, cfa_offset);
                } else {
-                       ppc_load (code, ppc_r11, -alloc_size);
-                       ppc_stwux (code, ppc_sp, ppc_sp, ppc_r11);
+                       if (pos)
+                               ppc_addi (code, ppc_r11, ppc_sp, -pos);
+                       ppc_load (code, ppc_r0, -alloc_size);
+                       ppc_str_update_indexed (code, ppc_sp, ppc_sp, ppc_r0);
+                       cfa_offset = alloc_size;
+                       mono_emit_unwind_op_def_cfa_offset (cfg, code, alloc_size);
+                       code = save_registers (cfg, code, 0, ppc_r11, method->save_lmf, cfg->used_int_regs, cfa_offset);
                }
        }
-       if (cfg->frame_reg != ppc_sp)
+       if (cfg->frame_reg != ppc_sp) {
                ppc_mr (code, cfg->frame_reg, ppc_sp);
+               mono_emit_unwind_op_def_cfa_reg (cfg, code, cfg->frame_reg);
+       }
 
        /* store runtime generic context */
        if (cfg->rgctx_var) {
                g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET &&
                                (cfg->rgctx_var->inst_basereg == ppc_r1 || cfg->rgctx_var->inst_basereg == ppc_r31));
 
-               ppc_stw (code, MONO_ARCH_RGCTX_REG, cfg->rgctx_var->inst_offset, cfg->rgctx_var->inst_basereg);
+               ppc_stptr (code, MONO_ARCH_RGCTX_REG, cfg->rgctx_var->inst_offset, cfg->rgctx_var->inst_basereg);
        }
 
         /* compute max_offset in order to use short forward jumps
@@ -3991,10 +4924,10 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                g_assert (inst);
 
                if (ppc_is_imm16 (inst->inst_offset)) {
-                       ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                       ppc_stptr (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                } else {
                        ppc_load (code, ppc_r11, inst->inst_offset);
-                       ppc_stwx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                       ppc_stptr_indexed (code, ainfo->reg, ppc_r11, inst->inst_basereg);
                }
        }
 
@@ -4011,13 +4944,13 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        else if (ainfo->regtype == RegTypeFP)
                                ppc_fmr (code, inst->dreg, ainfo->reg);
                        else if (ainfo->regtype == RegTypeBase) {
-                               ppc_lwz (code, ppc_r11, 0, ppc_sp);
-                               ppc_lwz (code, inst->dreg, ainfo->offset, ppc_r11);
+                               ppc_ldr (code, ppc_r11, 0, ppc_sp);
+                               ppc_ldptr (code, inst->dreg, ainfo->offset, ppc_r11);
                        } else
                                g_assert_not_reached ();
 
                        if (cfg->verbose_level > 2)
-                               g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
+                               g_print ("Argument %ld assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
                } else {
                        /* the argument should be put on the stack: FIXME handle size != word  */
                        if (ainfo->regtype == RegTypeGeneral) {
@@ -4026,75 +4959,159 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_stb (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stbx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stb (code, ainfo->reg, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stbx (code, ainfo->reg, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
                                case 2:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_sth (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_sth (code, ainfo->reg, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_sthx (code, ainfo->reg, inst->inst_basereg, ppc_r11);
+                                               }
+                                       }
+                                       break;
+#ifdef __mono_ppc64__
+                               case 4:
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stw (code, ainfo->reg, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stwx (code, ainfo->reg, inst->inst_basereg, ppc_r11);
+                                               }
+                                       }
+                                       break;
+                               case 8:
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_str (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                        } else {
                                                ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_sthx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                               ppc_str_indexed (code, ainfo->reg, ppc_r11, inst->inst_basereg);
                                        }
                                        break;
+#else
                                case 8:
                                        if (ppc_is_imm16 (inst->inst_offset + 4)) {
                                                ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                                ppc_stw (code, ainfo->reg + 1, inst->inst_offset + 4, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_add (code, ppc_r11, ppc_r11, inst->inst_basereg);
+                                               ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                               ppc_addi (code, ppc_r11, ppc_r11, inst->inst_offset);
                                                ppc_stw (code, ainfo->reg, 0, ppc_r11);
                                                ppc_stw (code, ainfo->reg + 1, 4, ppc_r11);
                                        }
                                        break;
+#endif
                                default:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
-                                               ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                               ppc_stptr (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stwx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stptr (code, ainfo->reg, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stptr_indexed (code, ainfo->reg, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
                                }
                        } else if (ainfo->regtype == RegTypeBase) {
+                               g_assert (ppc_is_imm16 (ainfo->offset));
                                /* load the previous stack pointer in r11 */
-                               ppc_lwz (code, ppc_r11, 0, ppc_sp);
-                               ppc_lwz (code, ppc_r0, ainfo->offset, ppc_r11);
+                               ppc_ldr (code, ppc_r11, 0, ppc_sp);
+                               ppc_ldptr (code, ppc_r0, ainfo->offset, ppc_r11);
                                switch (ainfo->size) {
                                case 1:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_stb (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stbx (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stb (code, ppc_r0, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stbx (code, ppc_r0, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
                                case 2:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_sth (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_sth (code, ppc_r0, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_sthx (code, ppc_r0, inst->inst_basereg, ppc_r11);
+                                               }
+                                       }
+                                       break;
+#ifdef __mono_ppc64__
+                               case 4:
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stw (code, ppc_r0, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stwx (code, ppc_r0, inst->inst_basereg, ppc_r11);
+                                               }
+                                       }
+                                       break;
+                               case 8:
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_str (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
                                        } else {
                                                ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_sthx (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                               ppc_str_indexed (code, ppc_r0, ppc_r11, inst->inst_basereg);
                                        }
                                        break;
+#else
                                case 8:
+                                       g_assert (ppc_is_imm16 (ainfo->offset + 4));
                                        if (ppc_is_imm16 (inst->inst_offset + 4)) {
                                                ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
                                                ppc_lwz (code, ppc_r0, ainfo->offset + 4, ppc_r11);
                                                ppc_stw (code, ppc_r0, inst->inst_offset + 4, inst->inst_basereg);
                                        } else {
-                                               /* FIXME */
-                                               g_assert_not_reached ();
+                                               /* use r12 to load the 2nd half of the long before we clobber r11.  */
+                                               ppc_lwz (code, ppc_r12, ainfo->offset + 4, ppc_r11);
+                                               ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                               ppc_addi (code, ppc_r11, ppc_r11, inst->inst_offset);
+                                               ppc_stw (code, ppc_r0, 0, ppc_r11);
+                                               ppc_stw (code, ppc_r12, 4, ppc_r11);
                                        }
                                        break;
+#endif
                                default:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
-                                               ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                               ppc_stptr (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stwx (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stptr (code, ppc_r0, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stptr_indexed (code, ppc_r0, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
                                }
@@ -4112,11 +5129,11 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                int cur_reg;
                                int size = 0;
                                g_assert (ppc_is_imm16 (inst->inst_offset));
-                               g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->size * sizeof (gpointer)));
+                               g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->vtregs * sizeof (gpointer)));
                                /* FIXME: what if there is no class? */
                                if (sig->pinvoke && mono_class_from_mono_type (inst->inst_vtype))
                                        size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
-                               for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
+                               for (cur_reg = 0; cur_reg < ainfo->vtregs; ++cur_reg) {
 #if __APPLE__
                                        /*
                                         * Darwin handles 1 and 2 byte
@@ -4131,14 +5148,30 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                                ppc_stb (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
                                        else
 #endif
-                                               ppc_stw (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+                                       {
+#ifdef __mono_ppc64__
+                                               if (ainfo->bytes) {
+                                                       g_assert (cur_reg == 0);
+                                                       ppc_sldi (code, ppc_r0, ainfo->reg,
+                                                                       (sizeof (gpointer) - ainfo->bytes) * 8);
+                                                       ppc_stptr (code, ppc_r0, doffset, inst->inst_basereg);
+                                               } else
+#endif
+                                               {
+                                                       ppc_stptr (code, ainfo->reg + cur_reg, doffset,
+                                                                       inst->inst_basereg);
+                                               }
+                                       }
                                        soffset += sizeof (gpointer);
                                        doffset += sizeof (gpointer);
                                }
                                if (ainfo->vtsize) {
+                                       /* FIXME: we need to do the shifting here, too */
+                                       if (ainfo->bytes)
+                                               NOT_IMPLEMENTED;
                                        /* load the previous stack pointer in r11 (r0 gets overwritten by the memcpy) */
-                                       ppc_lwz (code, ppc_r11, 0, ppc_sp);
-                                       if ((size & 3) != 0) {
+                                       ppc_ldr (code, ppc_r11, 0, ppc_sp);
+                                       if ((size & MONO_PPC_32_64_CASE (3, 7)) != 0) {
                                                code = emit_memcpy (code, size - soffset,
                                                        inst->inst_basereg, doffset,
                                                        ppc_r11, ainfo->offset + soffset);
@@ -4152,8 +5185,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                /* if it was originally a RegTypeBase */
                                if (ainfo->offset) {
                                        /* load the previous stack pointer in r11 */
-                                       ppc_lwz (code, ppc_r11, 0, ppc_sp);
-                                       ppc_lwz (code, ppc_r11, ainfo->offset, ppc_r11);
+                                       ppc_ldr (code, ppc_r11, 0, ppc_sp);
+                                       ppc_ldptr (code, ppc_r11, ainfo->offset, ppc_r11);
                                } else {
                                        ppc_mr (code, ppc_r11, ainfo->reg);
                                }
@@ -4162,7 +5195,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        MonoInst *addr = cfg->tailcall_valuetype_addrs [tailcall_struct_index];
 
                                        g_assert (ppc_is_imm16 (addr->inst_offset));
-                                       ppc_stw (code, ppc_r11, addr->inst_offset, addr->inst_basereg);
+                                       ppc_stptr (code, ppc_r11, addr->inst_offset, addr->inst_basereg);
 
                                        tailcall_struct_index++;
                                }
@@ -4177,11 +5210,14 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        }
 
        if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
-               ppc_load (code, ppc_r3, cfg->domain);
+               if (cfg->compile_aot)
+                       /* AOT code is only used in the root domain */
+                       ppc_load_ptr (code, ppc_r3, 0);
+               else
+                       ppc_load_ptr (code, ppc_r3, cfg->domain);
                mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
-               if (FORCE_INDIR_CALL || cfg->method->dynamic) {
-                       ppc_lis (code, ppc_r0, 0);
-                       ppc_ori (code, ppc_r0, ppc_r0, 0);
+               if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) {
+                       ppc_load_func (code, ppc_r0, 0);
                        ppc_mtlr (code, ppc_r0);
                        ppc_blrl (code);
                } else {
@@ -4192,14 +5228,17 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        if (method->save_lmf) {
                if (lmf_pthread_key != -1) {
                        emit_tls_access (code, ppc_r3, lmf_pthread_key);
-                       if (G_STRUCT_OFFSET (MonoJitTlsData, lmf))
+                       if (tls_mode != TLS_MODE_NPTL && G_STRUCT_OFFSET (MonoJitTlsData, lmf))
                                ppc_addi (code, ppc_r3, ppc_r3, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
                } else {
+                       if (cfg->compile_aot) {
+                               /* Compute the got address which is needed by the PLT entry */
+                               code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
+                       }
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                     (gpointer)"mono_get_lmf_addr");
-                       if (FORCE_INDIR_CALL || cfg->method->dynamic) {
-                               ppc_lis (code, ppc_r0, 0);
-                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                       if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) {
+                               ppc_load_func (code, ppc_r0, 0);
                                ppc_mtlr (code, ppc_r0);
                                ppc_blrl (code);
                        } else {
@@ -4215,27 +5254,40 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                 */
                ppc_addi (code, ppc_r11, ppc_sp, alloc_size - lmf_offset);
                /* ppc_r3 is the result from mono_get_lmf_addr () */
-               ppc_stw (code, ppc_r3, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11);
+               ppc_stptr (code, ppc_r3, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11);
                /* new_lmf->previous_lmf = *lmf_addr */
-               ppc_lwz (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3);
-               ppc_stw (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11);
+               ppc_ldptr (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3);
+               ppc_stptr (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11);
                /* *(lmf_addr) = r11 */
-               ppc_stw (code, ppc_r11, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3);
+               ppc_stptr (code, ppc_r11, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3);
                /* save method info */
-               ppc_load (code, ppc_r0, method);
-               ppc_stw (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, method), ppc_r11);
-               ppc_stw (code, ppc_sp, G_STRUCT_OFFSET(MonoLMF, ebp), ppc_r11);
+               if (cfg->compile_aot)
+                       // FIXME:
+                       ppc_load (code, ppc_r0, 0);
+               else
+                       ppc_load_ptr (code, ppc_r0, method);
+               ppc_stptr (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, method), ppc_r11);
+               ppc_stptr (code, ppc_sp, G_STRUCT_OFFSET(MonoLMF, ebp), ppc_r11);
                /* save the current IP */
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
-               ppc_load (code, ppc_r0, 0x01010101);
-               ppc_stw (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, eip), ppc_r11);
+               if (cfg->compile_aot) {
+                       ppc_bl (code, 1);
+                       ppc_mflr (code, ppc_r0);
+               } else {
+                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
+#ifdef __mono_ppc64__
+                       ppc_load_sequence (code, ppc_r0, (guint64)0x0101010101010101LL);
+#else
+                       ppc_load_sequence (code, ppc_r0, (gulong)0x01010101L);
+#endif
+               }
+               ppc_stptr (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, eip), ppc_r11);
        }
 
        if (tracing)
                code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
 
        cfg->code_len = code - cfg->native_code;
-       g_assert (cfg->code_len < cfg->code_size);
+       g_assert (cfg->code_len <= cfg->code_size);
        g_free (cinfo);
 
        return code;
@@ -4282,16 +5334,16 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                ppc_mr (code, ppc_r8, cfg->frame_reg);
                ppc_addi (code, ppc_r11, cfg->frame_reg, cfg->stack_usage - lmf_offset);
                /* r5 = previous_lmf */
-               ppc_lwz (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11);
+               ppc_ldptr (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11);
                /* r6 = lmf_addr */
-               ppc_lwz (code, ppc_r6, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11);
+               ppc_ldptr (code, ppc_r6, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11);
                /* *(lmf_addr) = previous_lmf */
-               ppc_stw (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r6);
+               ppc_stptr (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r6);
                /* FIXME: speedup: there is no actual need to restore the registers if
                 * we didn't actually change them (idea from Zoltan).
                 */
                /* restore iregs */
-               ppc_lmw (code, ppc_r13, ppc_r11, G_STRUCT_OFFSET(MonoLMF, iregs));
+               ppc_ldr_multiple (code, ppc_r13, G_STRUCT_OFFSET(MonoLMF, iregs), ppc_r11);
                /* restore fregs */
                /*for (i = 14; i < 32; i++) {
                        ppc_lfd (code, i, G_STRUCT_OFFSET(MonoLMF, fregs) + ((i-14) * sizeof (gdouble)), ppc_r11);
@@ -4299,39 +5351,56 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                g_assert (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET));
                /* use the saved copy of the frame reg in r8 */
                if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-                       ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, ppc_r8);
+                       ppc_ldr (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, ppc_r8);
                        ppc_mtlr (code, ppc_r0);
                }
                ppc_addic (code, ppc_sp, ppc_r8, cfg->stack_usage);
        } else {
                if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-                       if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) {
-                               ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
+                       long return_offset = cfg->stack_usage + PPC_RET_ADDR_OFFSET;
+                       if (ppc_is_imm16 (return_offset)) {
+                               ppc_ldr (code, ppc_r0, return_offset, cfg->frame_reg);
                        } else {
-                               ppc_load (code, ppc_r11, cfg->stack_usage + PPC_RET_ADDR_OFFSET);
-                               ppc_lwzx (code, ppc_r0, cfg->frame_reg, ppc_r11);
+                               ppc_load (code, ppc_r11, return_offset);
+                               ppc_ldr_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11);
                        }
                        ppc_mtlr (code, ppc_r0);
                }
                if (ppc_is_imm16 (cfg->stack_usage)) {
-                       ppc_addic (code, ppc_sp, cfg->frame_reg, cfg->stack_usage);
-               } else {
-                       ppc_load (code, ppc_r11, cfg->stack_usage);
-                       ppc_add (code, ppc_sp, cfg->frame_reg, ppc_r11);
-               }
-
-               /*for (i = 31; i >= 14; --i) {
-                       if (cfg->used_float_regs & (1 << i)) {
-                               pos += sizeof (double);
-                               ppc_lfd (code, i, -pos, ppc_sp);
+                       int offset = cfg->stack_usage;
+                       for (i = 13; i <= 31; i++) {
+                               if (cfg->used_int_regs & (1 << i))
+                                       offset -= sizeof (mgreg_t);
                        }
-               }*/
-               for (i = 31; i >= 13; --i) {
-                       if (cfg->used_int_regs & (1 << i)) {
-                               pos += sizeof (gulong);
-                               ppc_lwz (code, i, -pos, ppc_sp);
+                       if (cfg->frame_reg != ppc_sp)
+                               ppc_mr (code, ppc_r11, cfg->frame_reg);
+                       /* note r31 (possibly the frame register) is restored last */
+                       for (i = 13; i <= 31; i++) {
+                               if (cfg->used_int_regs & (1 << i)) {
+                                       ppc_ldr (code, i, offset, cfg->frame_reg);
+                                       offset += sizeof (mgreg_t);
+                               }
+                       }
+                       if (cfg->frame_reg != ppc_sp)
+                               ppc_addi (code, ppc_sp, ppc_r11, cfg->stack_usage);
+                       else
+                               ppc_addi (code, ppc_sp, ppc_sp, cfg->stack_usage);
+               } else {
+                       ppc_load32 (code, ppc_r11, cfg->stack_usage);
+                       if (cfg->used_int_regs) {
+                               ppc_add (code, ppc_r11, cfg->frame_reg, ppc_r11);
+                               for (i = 31; i >= 13; --i) {
+                                       if (cfg->used_int_regs & (1 << i)) {
+                                               pos += sizeof (mgreg_t);
+                                               ppc_ldr (code, i, -pos, ppc_r11);
+                                       }
+                               }
+                               ppc_mr (code, ppc_sp, ppc_r11);
+                       } else {
+                               ppc_add (code, ppc_sp, cfg->frame_reg, ppc_r11);
                        }
                }
+
        }
        ppc_blr (code);
 
@@ -4377,13 +5446,12 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
      
        /* 
         * make sure we have enough space for exceptions
-        * 24 is the simulated call to throw_exception_by_name
         */
        for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
                if (patch_info->type == MONO_PATCH_INFO_EXC) {
                        i = exception_id_by_name (patch_info->data.target);
                        if (!exc_throw_found [i]) {
-                               max_epilog_size += 24;
+                               max_epilog_size += (2 * PPC_LOAD_SEQUENCE_LENGTH) + 5 * 4;
                                exc_throw_found [i] = TRUE;
                        }
                } else if (patch_info->type == MONO_PATCH_INFO_BB_OVF)
@@ -4392,7 +5460,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                        MonoOvfJump *ovfj = (MonoOvfJump*)patch_info->data.target;
                        i = exception_id_by_name (ovfj->data.exception);
                        if (!exc_throw_found [i]) {
-                               max_epilog_size += 24;
+                               max_epilog_size += (2 * PPC_LOAD_SEQUENCE_LENGTH) + 5 * 4;
                                exc_throw_found [i] = TRUE;
                        }
                        max_epilog_size += 8;
@@ -4422,6 +5490,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                        ppc_b (code, 0);
                        ip = ovfj->data.bb->native_offset + cfg->native_code;
                        ppc_patch (code - 4, ip);
+                       patch_info->type = MONO_PATCH_INFO_NONE;
                        break;
                }
                case MONO_PATCH_INFO_EXC_OVF: {
@@ -4442,9 +5511,12 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                        newji->data.target = ovfj->data.exception;
                        newji->next = patch_info->next;
                        patch_info->next = newji;
+                       patch_info->type = MONO_PATCH_INFO_NONE;
                        break;
                }
                case MONO_PATCH_INFO_EXC: {
+                       MonoClass *exc_class;
+
                        unsigned char *ip = patch_info->ip.i + cfg->native_code;
                        i = exception_id_by_name (patch_info->data.target);
                        if (exc_throw_pos [i]) {
@@ -4454,20 +5526,24 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                        } else {
                                exc_throw_pos [i] = code;
                        }
+
+                       exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
+                       g_assert (exc_class);
+
                        ppc_patch (ip, code);
                        /*mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);*/
-                       ppc_load (code, ppc_r3, patch_info->data.target);
-                       /* we got here from a conditional call, so the calling ip is set in lr already */
+                       ppc_load (code, ppc_r3, exc_class->type_token);
+                       /* we got here from a conditional call, so the calling ip is set in lr */
+                       ppc_mflr (code, ppc_r4);
                        patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
-                       patch_info->data.name = "mono_arch_throw_exception_by_name";
+                       patch_info->data.name = "mono_arch_throw_corlib_exception";
                        patch_info->ip.i = code - cfg->native_code;
                        if (FORCE_INDIR_CALL || cfg->method->dynamic) {
-                               ppc_lis (code, ppc_r0, 0);
-                               ppc_ori (code, ppc_r0, ppc_r0, 0);
+                               ppc_load_func (code, ppc_r0, 0);
                                ppc_mtctr (code, ppc_r0);
                                ppc_bcctr (code, PPC_BR_ALWAYS, 0);
                        } else {
-                               ppc_b (code, 0);
+                               ppc_bl (code, 0);
                        }
                        break;
                }
@@ -4479,10 +5555,10 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
 
        cfg->code_len = code - cfg->native_code;
 
-       g_assert (cfg->code_len < cfg->code_size);
-
+       g_assert (cfg->code_len <= cfg->code_size);
 }
 
+#if DEAD_CODE
 static int
 try_offset_access (void *value, guint32 idx)
 {
@@ -4496,22 +5572,41 @@ try_offset_access (void *value, guint32 idx)
                return 0;
        return 1;
 }
+#endif
 
 static void
 setup_tls_access (void)
 {
        guint32 ptk;
+
+#if defined(__linux__) && defined(_CS_GNU_LIBPTHREAD_VERSION)
+       size_t conf_size = 0;
+       char confbuf[128];
+#else
+       /* FIXME for darwin */
        guint32 *ins, *code;
        guint32 cmplwi_1023, li_0x48, blr_ins;
+#endif
+
+#ifdef TARGET_PS3
+       tls_mode = TLS_MODE_FAILED;
+#endif
+
        if (tls_mode == TLS_MODE_FAILED)
                return;
-
        if (g_getenv ("MONO_NO_TLS")) {
                tls_mode = TLS_MODE_FAILED;
                return;
        }
 
-       if (tls_mode == TLS_MODE_DETECT) {
+       if (tls_mode == TLS_MODE_DETECT) {
+#if defined(__APPLE__) && defined(__mono_ppc__) && !defined(__mono_ppc64__)
+               tls_mode = TLS_MODE_DARWIN_G4;
+#elif defined(__linux__) && defined(_CS_GNU_LIBPTHREAD_VERSION)
+               conf_size = confstr ( _CS_GNU_LIBPTHREAD_VERSION, confbuf, sizeof(confbuf));
+               if ((conf_size > 4) && (strncmp (confbuf, "NPTL", 4) == 0))
+                       tls_mode = TLS_MODE_NPTL;
+#elif !defined(TARGET_PS3)
                ins = (guint32*)pthread_getspecific;
                /* uncond branch to the real method */
                if ((*ins >> 26) == 18) {
@@ -4520,7 +5615,7 @@ setup_tls_access (void)
                        val >>= 6;
                        if (*ins & 2) {
                                /* absolute */
-                               ins = (guint32*)val;
+                               ins = (guint32*)(long)val;
                        } else {
                                ins = (guint32*) ((char*)ins + val);
                        }
@@ -4556,7 +5651,7 @@ setup_tls_access (void)
                                val >>= 6;
                                if (*ins & 2) {
                                        /* absolute */
-                                       ins = (guint32*)val;
+                                       ins = (guint32*)(long)val;
                                } else {
                                        ins = (guint32*) ((char*)ins + val);
                                }
@@ -4583,7 +5678,18 @@ setup_tls_access (void)
                        tls_mode = TLS_MODE_FAILED;
                        return;
                }
+#endif
        }
+#ifndef TARGET_PS3
+       if (tls_mode == TLS_MODE_DETECT)
+               tls_mode = TLS_MODE_FAILED;
+       if (tls_mode == TLS_MODE_FAILED)
+               return;
+       if ((monodomain_key == -1) && (tls_mode == TLS_MODE_NPTL)) {
+               monodomain_key = mono_domain_get_tls_offset();
+       }
+       /* if not TLS_MODE_NPTL or local dynamic (as indicated by
+          mono_domain_get_tls_offset returning -1) then use keyed access. */
        if (monodomain_key == -1) {
                ptk = mono_domain_get_tls_key ();
                if (ptk < 1024) {
@@ -4593,6 +5699,12 @@ setup_tls_access (void)
                        }
                }
        }
+
+       if ((lmf_pthread_key == -1) && (tls_mode == TLS_MODE_NPTL)) {
+               lmf_pthread_key = mono_get_lmf_addr_tls_offset();
+       }
+       /* if not TLS_MODE_NPTL or local dynamic (as indicated by
+          mono_get_lmf_addr_tls_offset returning -1) then use keyed access. */
        if (lmf_pthread_key == -1) {
                ptk = mono_pthread_key_for_tls (mono_jit_tls_id);
                if (ptk < 1024) {
@@ -4604,18 +5716,7 @@ setup_tls_access (void)
                        lmf_pthread_key = ptk;
                }
        }
-       if (monothread_key == -1) {
-               ptk = mono_thread_get_tls_key ();
-               if (ptk < 1024) {
-                       ptk = mono_pthread_key_for_tls (ptk);
-                       if (ptk < 1024) {
-                               monothread_key = ptk;
-                               /*g_print ("thread inited: %d\n", ptk);*/
-                       }
-               } else {
-                       /*g_print ("thread not inited yet %d\n", ptk);*/
-               }
-       }
+#endif
 }
 
 void
@@ -4631,10 +5732,11 @@ mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
 
 #ifdef MONO_ARCH_HAVE_IMT
 
-#define CMP_SIZE 12
+#define CMP_SIZE (PPC_LOAD_SEQUENCE_LENGTH + 4)
 #define BR_SIZE 4
+#define LOADSTORE_SIZE 4
 #define JUMP_IMM_SIZE 12
-#define JUMP_IMM32_SIZE 16
+#define JUMP_IMM32_SIZE (PPC_LOAD_SEQUENCE_LENGTH + 8)
 #define ENABLE_WRONG_METHOD_CHECK 0
 
 /*
@@ -4654,15 +5756,17 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                        if (item->check_target_idx) {
                                if (!item->compare_done)
                                        item->chunk_size += CMP_SIZE;
-                               if (fail_tramp)
+                               if (item->has_target_code)
                                        item->chunk_size += BR_SIZE + JUMP_IMM32_SIZE;
                                else
-                                       item->chunk_size += BR_SIZE + JUMP_IMM_SIZE;
+                                       item->chunk_size += LOADSTORE_SIZE + BR_SIZE + JUMP_IMM_SIZE;
                        } else {
                                if (fail_tramp) {
                                        item->chunk_size += CMP_SIZE + BR_SIZE + JUMP_IMM32_SIZE * 2;
+                                       if (!item->has_target_code)
+                                               item->chunk_size += LOADSTORE_SIZE;
                                } else {
-                                       item->chunk_size += JUMP_IMM_SIZE;
+                                       item->chunk_size += LOADSTORE_SIZE + JUMP_IMM_SIZE;
 #if ENABLE_WRONG_METHOD_CHECK
                                        item->chunk_size += CMP_SIZE + BR_SIZE + 4;
 #endif
@@ -4678,40 +5782,58 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                code = mono_method_alloc_generic_virtual_thunk (domain, size);
        } else {
                /* the initial load of the vtable address */
-               size += 8;
-               code = mono_code_manager_reserve (domain->code_mp, size);
+               size += PPC_LOAD_SEQUENCE_LENGTH + LOADSTORE_SIZE;
+               code = mono_domain_code_reserve (domain, size);
        }
        start = code;
-       if (!fail_tramp)
-               ppc_load (code, ppc_r11, (guint32)(& (vtable->vtable [0])));
+       if (!fail_tramp) {
+               /*
+                * We need to save and restore r11 because it might be
+                * used by the caller as the vtable register, so
+                * clobbering it will trip up the magic trampoline.
+                *
+                * FIXME: Get rid of this by making sure that r11 is
+                * not used as the vtable register in interface calls.
+                */
+               ppc_stptr (code, ppc_r11, PPC_RET_ADDR_OFFSET, ppc_sp);
+               ppc_load (code, ppc_r11, (gsize)(& (vtable->vtable [0])));
+       }
        for (i = 0; i < count; ++i) {
                MonoIMTCheckItem *item = imt_entries [i];
                item->code_target = code;
                if (item->is_equals) {
                        if (item->check_target_idx) {
                                if (!item->compare_done) {
-                                       ppc_load (code, ppc_r0, (guint32)item->key);
-                                       ppc_cmpl (code, 0, 0, MONO_ARCH_IMT_REG, ppc_r0);
+                                       ppc_load (code, ppc_r0, (gsize)item->key);
+                                       ppc_compare_log (code, 0, MONO_ARCH_IMT_REG, ppc_r0);
                                }
                                item->jmp_code = code;
                                ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
-                               if (fail_tramp)
-                                       ppc_load (code, ppc_r0, item->value.target_code);
-                               else
-                                       ppc_lwz (code, ppc_r0, (sizeof (gpointer) * item->value.vtable_slot), ppc_r11);
+                               if (item->has_target_code) {
+                                       ppc_load_ptr (code, ppc_r0, item->value.target_code);
+                               } else {
+                                       ppc_ldptr (code, ppc_r0, (sizeof (gpointer) * item->value.vtable_slot), ppc_r11);
+                                       ppc_ldptr (code, ppc_r11, PPC_RET_ADDR_OFFSET, ppc_sp);
+                               }
                                ppc_mtctr (code, ppc_r0);
                                ppc_bcctr (code, PPC_BR_ALWAYS, 0);
                        } else {
                                if (fail_tramp) {
-                                       ppc_load (code, ppc_r0, (guint32)item->key);
-                                       ppc_cmpl (code, 0, 0, MONO_ARCH_IMT_REG, ppc_r0);
+                                       ppc_load (code, ppc_r0, (gulong)item->key);
+                                       ppc_compare_log (code, 0, MONO_ARCH_IMT_REG, ppc_r0);
                                        item->jmp_code = code;
                                        ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
-                                       ppc_load (code, ppc_r0, item->value.target_code);
+                                       if (item->has_target_code) {
+                                               ppc_load_ptr (code, ppc_r0, item->value.target_code);
+                                       } else {
+                                               g_assert (vtable);
+                                               ppc_load_ptr (code, ppc_r0, & (vtable->vtable [item->value.vtable_slot]));
+                                               ppc_ldptr_indexed (code, ppc_r0, 0, ppc_r0);
+                                       }
                                        ppc_mtctr (code, ppc_r0);
                                        ppc_bcctr (code, PPC_BR_ALWAYS, 0);
                                        ppc_patch (item->jmp_code, code);
-                                       ppc_load (code, ppc_r0, fail_tramp);
+                                       ppc_load_ptr (code, ppc_r0, fail_tramp);
                                        ppc_mtctr (code, ppc_r0);
                                        ppc_bcctr (code, PPC_BR_ALWAYS, 0);
                                        item->jmp_code = NULL;
@@ -4719,11 +5841,12 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                        /* enable the commented code to assert on wrong method */
 #if ENABLE_WRONG_METHOD_CHECK
                                        ppc_load (code, ppc_r0, (guint32)item->key);
-                                       ppc_cmpl (code, 0, 0, MONO_ARCH_IMT_REG, ppc_r0);
+                                       ppc_compare_log (code, 0, MONO_ARCH_IMT_REG, ppc_r0);
                                        item->jmp_code = code;
                                        ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
 #endif
-                                       ppc_lwz (code, ppc_r0, (sizeof (gpointer) * item->value.vtable_slot), ppc_r11);
+                                       ppc_ldptr (code, ppc_r0, (sizeof (gpointer) * item->value.vtable_slot), ppc_r11);
+                                       ppc_ldptr (code, ppc_r11, PPC_RET_ADDR_OFFSET, ppc_sp);
                                        ppc_mtctr (code, ppc_r0);
                                        ppc_bcctr (code, PPC_BR_ALWAYS, 0);
 #if ENABLE_WRONG_METHOD_CHECK
@@ -4734,8 +5857,8 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                }
                        }
                } else {
-                       ppc_load (code, ppc_r0, (guint32)item->key);
-                       ppc_cmpl (code, 0, 0, MONO_ARCH_IMT_REG, ppc_r0);
+                       ppc_load (code, ppc_r0, (gulong)item->key);
+                       ppc_compare_log (code, 0, MONO_ARCH_IMT_REG, ppc_r0);
                        item->jmp_code = code;
                        ppc_bc (code, PPC_BR_FALSE, PPC_BR_LT, 0);
                }
@@ -4758,22 +5881,20 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
 }
 
 MonoMethod*
-mono_arch_find_imt_method (gpointer *regs, guint8 *code)
+mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
 {
-       return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
-}
+       mgreg_t *r = (mgreg_t*)regs;
 
-MonoObject*
-mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
-{
-       return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL);
+       return (MonoMethod*)(gsize) r [MONO_ARCH_IMT_REG];
 }
 #endif
 
 MonoVTable*
-mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
+mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
 {
-       return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
+       mgreg_t *r = (mgreg_t*)regs;
+
+       return (MonoVTable*)(gsize) r [MONO_ARCH_RGCTX_REG];
 }
 
 MonoInst*
@@ -4802,24 +5923,238 @@ MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
        return ins;
 }
 
-MonoInst* 
-mono_arch_get_thread_intrinsic (MonoCompile* cfg)
+gpointer
+mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
 {
-       MonoInst* ins;
+       if (reg == ppc_r1)
+               return MONO_CONTEXT_GET_SP (ctx);
 
-       setup_tls_access ();
-       if (monothread_key == -1)
-               return NULL;
+       g_assert (reg >= ppc_r13);
+
+       return (gpointer)(gsize)ctx->regs [reg - ppc_r13];
+}
+
+guint32
+mono_arch_get_patch_offset (guint8 *code)
+{
+       return 0;
+}
+
+/*
+ * mono_aot_emit_load_got_addr:
+ *
+ *   Emit code to load the got address.
+ * On PPC, the result is placed into r30.
+ */
+guint8*
+mono_arch_emit_load_got_addr (guint8 *start, guint8 *code, MonoCompile *cfg, MonoJumpInfo **ji)
+{
+       ppc_bl (code, 1);
+       ppc_mflr (code, ppc_r30);
+       if (cfg)
+               mono_add_patch_info (cfg, code - start, MONO_PATCH_INFO_GOT_OFFSET, NULL);
+       else
+               *ji = mono_patch_info_list_prepend (*ji, code - start, MONO_PATCH_INFO_GOT_OFFSET, NULL);
+       /* arch_emit_got_address () patches this */
+#if defined(TARGET_POWERPC64)
+       ppc_nop (code);
+       ppc_nop (code);
+       ppc_nop (code);
+       ppc_nop (code);
+#else
+       ppc_load32 (code, ppc_r0, 0);
+       ppc_add (code, ppc_r30, ppc_r30, ppc_r0);
+#endif
+
+       return code;
+}
+
+/*
+ * mono_ppc_emit_load_aotconst:
+ *
+ *   Emit code to load the contents of the GOT slot identified by TRAMP_TYPE and
+ * TARGET from the mscorlib GOT in full-aot code.
+ * On PPC, the GOT address is assumed to be in r30, and the result is placed into 
+ * r11.
+ */
+guint8*
+mono_arch_emit_load_aotconst (guint8 *start, guint8 *code, MonoJumpInfo **ji, int tramp_type, gconstpointer target)
+{
+       /* Load the mscorlib got address */
+       ppc_ldptr (code, ppc_r11, sizeof (gpointer), ppc_r30);
+       *ji = mono_patch_info_list_prepend (*ji, code - start, tramp_type, target);
+       /* arch_emit_got_access () patches this */
+       ppc_load32 (code, ppc_r0, 0);
+       ppc_ldptr_indexed (code, ppc_r11, ppc_r11, ppc_r0);
+
+       return code;
+}
+
+/* Soft Debug support */
+#ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
+
+/*
+ * BREAKPOINTS
+ */
+
+/*
+ * mono_arch_set_breakpoint:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
+{
+       guint8 *code = ip;
+       guint8 *orig_code = code;
+
+       ppc_load_sequence (code, ppc_r11, (gsize)bp_trigger_page);
+       ppc_ldptr (code, ppc_r11, 0, ppc_r11);
+
+       g_assert (code - orig_code == BREAKPOINT_SIZE);
+
+       mono_arch_flush_icache (orig_code, code - orig_code);
+}
+
+/*
+ * mono_arch_clear_breakpoint:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
+{
+       guint8 *code = ip;
+       int i;
+
+       for (i = 0; i < BREAKPOINT_SIZE / 4; ++i)
+               ppc_nop (code);
+
+       mono_arch_flush_icache (ip, code - ip);
+}
+
+/*
+ * mono_arch_is_breakpoint_event:
+ *
+ *   See mini-amd64.c for docs.
+ */
+gboolean
+mono_arch_is_breakpoint_event (void *info, void *sigctx)
+{
+       siginfo_t* sinfo = (siginfo_t*) info;
+       /* Sometimes the address is off by 4 */
+       if (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128)
+               return TRUE;
+       else
+               return FALSE;
+}
+
+/*
+ * mono_arch_get_ip_for_breakpoint:
+ *
+ *   See mini-amd64.c for docs.
+ */
+guint8*
+mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx)
+{
+       guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
+
+       /* ip points at the ldptr instruction */
+       ip -= PPC_LOAD_SEQUENCE_LENGTH;
+
+       return ip;
+}
+
+/*
+ * mono_arch_skip_breakpoint:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_skip_breakpoint (MonoContext *ctx)
+{
+       /* skip the ldptr */
+       MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 4);
+}
+
+/*
+ * SINGLE STEPPING
+ */
        
-       MONO_INST_NEW (cfg, ins, OP_TLS_GET);
-       ins->inst_offset = monothread_key;
-       return ins;
+/*
+ * mono_arch_start_single_stepping:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_start_single_stepping (void)
+{
+       mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
+}
+       
+/*
+ * mono_arch_stop_single_stepping:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_stop_single_stepping (void)
+{
+       mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
 }
 
-gpointer
-mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
+/*
+ * mono_arch_is_single_step_event:
+ *
+ *   See mini-amd64.c for docs.
+ */
+gboolean
+mono_arch_is_single_step_event (void *info, void *sigctx)
 {
-       g_assert (reg >= ppc_r13);
+       siginfo_t* sinfo = (siginfo_t*) info;
+       /* Sometimes the address is off by 4 */
+       if (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128)
+               return TRUE;
+       else
+               return FALSE;
+}
 
-       return (gpointer)ctx->regs [reg - ppc_r13];
+/*
+ * mono_arch_get_ip_for_single_step:
+ *
+ *   See mini-amd64.c for docs.
+ */
+guint8*
+mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx)
+{
+       guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
+
+       /* ip points after the ldptr instruction */
+       return ip;
+}
+
+/*
+ * mono_arch_skip_single_step:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_skip_single_step (MonoContext *ctx)
+{
+       /* skip the ldptr */
+       MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 4);
 }
+
+/*
+ * mono_arch_create_seq_point_info:
+ *
+ *   See mini-amd64.c for docs.
+ */
+gpointer
+mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
+{
+       NOT_IMPLEMENTED;
+       return NULL;
+}
+
+#endif