Mon Mar 8 17:58:26 CET 2010 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-ppc.c
old mode 100644 (file)
new mode 100755 (executable)
index 9046264..a23e078
@@ -14,6 +14,8 @@
 
 #include <mono/metadata/appdomain.h>
 #include <mono/metadata/debug-helpers.h>
+#include <mono/utils/mono-proclib.h>
+#include <mono/utils/mono-mmap.h>
 
 #include "mini-ppc.h"
 #ifdef TARGET_POWERPC64
@@ -41,6 +43,22 @@ enum {
        TLS_MODE_DARWIN_G5
 };
 
+/* cpu_hw_caps contains the flags defined below */
+static int cpu_hw_caps = 0;
+static int cachelinesize = 0;
+static int cachelineinc = 0;
+enum {
+       PPC_ICACHE_SNOOP      = 1 << 0,
+       PPC_MULTIPLE_LS_UNITS = 1 << 1,
+       PPC_SMP_CAPABLE       = 1 << 2,
+       PPC_ISA_2X            = 1 << 3,
+       PPC_ISA_64            = 1 << 4,
+       PPC_MOVE_FPR_GPR      = 1 << 5,
+       PPC_HW_CAP_END
+};
+
+#define BREAKPOINT_SIZE (PPC_LOAD_SEQUENCE_LENGTH + 4)
+
 /* This mutex protects architecture specific caches */
 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
@@ -49,9 +67,17 @@ static CRITICAL_SECTION mini_arch_mutex;
 int mono_exc_esp_offset = 0;
 static int tls_mode = TLS_MODE_DETECT;
 static int lmf_pthread_key = -1;
-static int monothread_key = -1;
 static int monodomain_key = -1;
 
+/*
+ * The code generated for sequence points reads from this location, which is
+ * made read-only when single stepping is enabled.
+ */
+static gpointer ss_trigger_page;
+
+/* Enabled breakpoints read from this trigger page */
+static gpointer bp_trigger_page;
+
 static int
 offsets_from_pthread_key (guint32 key, int *offset2)
 {
@@ -159,7 +185,7 @@ emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffse
 {
        /* unrolled, use the counter in big */
        if (size > sizeof (gpointer) * 5) {
-               long shifted = size >> MONO_PPC_32_64_CASE (2, 3);
+               long shifted = size / SIZEOF_VOID_P;
                guint8 *copy_loop_start, *copy_loop_jump;
 
                ppc_load (code, ppc_r0, shifted);
@@ -178,13 +204,39 @@ emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffse
                dreg = ppc_r12;
        }
 #ifdef __mono_ppc64__
+       /* the hardware has multiple load/store units and the move is long
+          enough to use more then one regiester, then use load/load/store/store
+          to execute 2 instructions per cycle. */
+       if ((cpu_hw_caps & PPC_MULTIPLE_LS_UNITS) && (dreg != ppc_r12) && (sreg != ppc_r12)) { 
+               while (size >= 16) {
+                       ppc_ldptr (code, ppc_r0, soffset, sreg);
+                       ppc_ldptr (code, ppc_r12, soffset+8, sreg);
+                       ppc_stptr (code, ppc_r0, doffset, dreg);
+                       ppc_stptr (code, ppc_r12, doffset+8, dreg);
+                       size -= 16;
+                       soffset += 16;
+                       doffset += 16; 
+               }
+       }
        while (size >= 8) {
-               ppc_ldptr (code, ppc_r0, soffset, sreg);
-               ppc_stptr (code, ppc_r0, doffset, dreg);
+               ppc_ldr (code, ppc_r0, soffset, sreg);
+               ppc_str (code, ppc_r0, doffset, dreg);
                size -= 8;
                soffset += 8;
                doffset += 8;
        }
+#else
+       if ((cpu_hw_caps & PPC_MULTIPLE_LS_UNITS) && (dreg != ppc_r12) && (sreg != ppc_r12)) { 
+               while (size >= 8) {
+                       ppc_lwz (code, ppc_r0, soffset, sreg);
+                       ppc_lwz (code, ppc_r12, soffset+4, sreg);
+                       ppc_stw (code, ppc_r0, doffset, dreg);
+                       ppc_stw (code, ppc_r12, doffset+4, dreg);
+                       size -= 8;
+                       soffset += 8;
+                       doffset += 8; 
+               }
+       }
 #endif
        while (size >= 4) {
                ppc_lwz (code, ppc_r0, soffset, sreg);
@@ -322,11 +374,12 @@ mono_ppc_is_direct_call_sequence (guint32 *code)
 }
 
 gpointer
-mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement)
+mono_arch_get_vcall_slot (guint8 *code_ptr, mgreg_t *regs, int *displacement)
 {
        char *o = NULL;
        int reg, offset = 0;
        guint32* code = (guint32*)code_ptr;
+       mgreg_t *r = (mgreg_t*)regs;
 
        *displacement = 0;
 
@@ -359,13 +412,7 @@ mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement)
                        reg = (*code >> 16) & 0x1f;
                        g_assert (reg != ppc_r1);
                        /*g_print ("patching reg is %d\n", reg);*/
-                       if (reg >= 13) {
-                               MonoLMF *lmf = (MonoLMF*)((char*)regs + (14 * sizeof (double)) + (13 * sizeof (gpointer)));
-                               /* saved in the MonoLMF structure */
-                               o = (gpointer)lmf->iregs [reg - 13];
-                       } else {
-                               o = regs [reg];
-                       }
+                       o = (gpointer)(gsize)r [reg];
                        break;
                }
        }
@@ -391,7 +438,8 @@ get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *cod
                ppc_ldptr (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3);
 #ifdef PPC_USES_FUNCTION_DESCRIPTOR
                /* it's a function descriptor */
-               ppc_ldx (code, ppc_r0, 0, ppc_r0);
+               /* Can't use ldptr as it doesn't work with r0 */
+               ppc_ldptr_indexed (code, ppc_r0, 0, ppc_r0);
 #endif
                ppc_mtctr (code, ppc_r0);
                ppc_ldptr (code, ppc_r3, G_STRUCT_OFFSET (MonoDelegate, target), ppc_r3);
@@ -411,7 +459,7 @@ get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *cod
                ppc_ldptr (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3);
 #ifdef PPC_USES_FUNCTION_DESCRIPTOR
                /* it's a function descriptor */
-               ppc_ldx (code, ppc_r0, 0, ppc_r0);
+               ppc_ldptr_indexed (code, ppc_r0, 0, ppc_r0);
 #endif
                ppc_mtctr (code, ppc_r0);
                /* slide down the arguments */
@@ -504,20 +552,130 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe
 }
 
 gpointer
-mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gssize *regs, guint8 *code)
+mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, mgreg_t *regs, guint8 *code)
 {
+       mgreg_t *r = (mgreg_t*)regs;
+
        /* FIXME: handle returning a struct */
        if (MONO_TYPE_ISSTRUCT (sig->ret))
-               return (gpointer)regs [ppc_r4];
-       return (gpointer)regs [ppc_r3];
+               return (gpointer)(gsize)r [ppc_r4];
+       return (gpointer)(gsize)r [ppc_r3];
 }
 
+typedef struct {
+       long int type;
+       long int value;
+} AuxVec;
+
+#ifdef USE_ENVIRON_HACK
+static AuxVec*
+linux_find_auxv (int *count)
+{
+       AuxVec *vec;
+       int c = 0;
+       char **result = __environ;
+       /* Scan over the env vector looking for the ending NULL */
+       for (; *result != NULL; ++result) {
+       }
+       /* Bump the pointer one more step, which should be the auxv. */
+       ++result;
+       vec = (AuxVec *)result;
+       if (vec->type != 22 /*AT_IGNOREPPC*/) {
+               *count = 0;
+               return NULL;
+       }
+       while (vec->type != 0 /*AT_NULL*/) {
+               vec++;
+               c++;
+       }
+       *count = c;
+       return (AuxVec *)result;
+}
+#endif
+
+#define MAX_AUX_ENTRIES 128
+/* 
+ * PPC_FEATURE_POWER4, PPC_FEATURE_POWER5, PPC_FEATURE_POWER5_PLUS, PPC_FEATURE_CELL,
+ * PPC_FEATURE_PA6T, PPC_FEATURE_ARCH_2_05 are considered supporting 2X ISA features
+ */
+#define ISA_2X (0x00080000 | 0x00040000 | 0x00020000 | 0x00010000 | 0x00000800 | 0x00001000)
+
+/* define PPC_FEATURE_64 HWCAP for 64-bit category.  */
+#define ISA_64 0x40000000
+
+/* define PPC_FEATURE_POWER6_EXT HWCAP for power6x mffgpr/mftgpr instructions.  */
+#define ISA_MOVE_FPR_GPR 0x00000200
 /*
  * Initialize the cpu to execute managed code.
  */
 void
 mono_arch_cpu_init (void)
 {
+#ifdef __APPLE__
+       int mib [3];
+       size_t len;
+       mib [0] = CTL_HW;
+       mib [1] = HW_CACHELINE;
+       len = sizeof (cachelinesize);
+       if (sysctl (mib, 2, &cachelinesize, (size_t*)&len, NULL, 0) == -1) {
+               perror ("sysctl");
+               cachelinesize = 128;
+       } else {
+               cachelineinc = cachelinesize;
+       }
+#elif defined(__linux__)
+       AuxVec vec [MAX_AUX_ENTRIES];
+       int i, vec_entries = 0;
+       /* sadly this will work only with 2.6 kernels... */
+       FILE* f = fopen ("/proc/self/auxv", "rb");
+       if (f) {
+               vec_entries = fread (&vec, sizeof (AuxVec), MAX_AUX_ENTRIES, f);
+               fclose (f);
+#ifdef USE_ENVIRON_HACK
+       } else {
+               AuxVec *evec = linux_find_auxv (&vec_entries);
+               if (vec_entries)
+                       memcpy (&vec, evec, sizeof (AuxVec) * MIN (vec_entries, MAX_AUX_ENTRIES));
+#endif
+       }
+       for (i = 0; i < vec_entries; i++) {
+               int type = vec [i].type;
+               if (type == 19) { /* AT_DCACHEBSIZE */
+                       cachelinesize = vec [i].value;
+                       continue;
+               } else if (type == 16) { /* AT_HWCAP */
+                       if (vec [i].value & 0x00002000 /*PPC_FEATURE_ICACHE_SNOOP*/)
+                               cpu_hw_caps |= PPC_ICACHE_SNOOP;
+                       if (vec [i].value & ISA_2X)
+                               cpu_hw_caps |= PPC_ISA_2X;
+                       if (vec [i].value & ISA_64)
+                               cpu_hw_caps |= PPC_ISA_64;
+                       if (vec [i].value & ISA_MOVE_FPR_GPR)
+                               cpu_hw_caps |= PPC_MOVE_FPR_GPR;
+                       continue;
+               } else if (type == 15) { /* AT_PLATFORM */
+                       const char *arch = (char*)vec [i].value;
+                       if (strcmp (arch, "ppc970") == 0 ||
+                                       (strncmp (arch, "power", 5) == 0 && arch [5] >= '4' && arch [5] <= '7'))
+                               cpu_hw_caps |= PPC_MULTIPLE_LS_UNITS;
+                       /*printf ("cpu: %s\n", (char*)vec [i].value);*/
+                       continue;
+               }
+       }
+#elif defined(G_COMPILER_CODEWARRIOR)
+       cachelinesize = 32;
+       cachelineinc = 32;
+#elif defined(MONO_CROSS_COMPILE)
+#else
+//#error Need a way to get cache line size
+#endif
+       if (!cachelinesize)
+               cachelinesize = 32;
+       if (!cachelineinc)
+               cachelineinc = cachelinesize;
+
+       if (mono_cpu_count () > 1)
+               cpu_hw_caps |= PPC_SMP_CAPABLE;
 }
 
 /*
@@ -526,7 +684,11 @@ mono_arch_cpu_init (void)
 void
 mono_arch_init (void)
 {
-       InitializeCriticalSection (&mini_arch_mutex);   
+       InitializeCriticalSection (&mini_arch_mutex);
+
+       ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
+       bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
+       mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
 }
 
 /*
@@ -626,8 +788,14 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
        if (cfg->frame_reg != ppc_sp)
                top = 31;
        /* ppc_r13 is used by the system on PPC EABI */
-       for (i = 14; i < top; ++i)
-               regs = g_list_prepend (regs, GUINT_TO_POINTER (i));
+       for (i = 14; i < top; ++i) {
+               /*
+                * Reserve r29 for holding the vtable address for virtual calls in AOT mode,
+                * since the trampolines can clobber r11.
+                */
+               if (!(cfg->compile_aot && i == 29))
+                       regs = g_list_prepend (regs, GUINT_TO_POINTER (i));
+       }
 
        return regs;
 }
@@ -646,11 +814,6 @@ mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
        return 2;
 }
 
-typedef struct {
-       long int type;
-       long int value;
-} AuxVec;
-
 void
 mono_arch_flush_icache (guint8 *code, gint size)
 {
@@ -658,52 +821,13 @@ mono_arch_flush_icache (guint8 *code, gint size)
 #else
        register guint8 *p;
        guint8 *endp, *start;
-       static int cachelinesize = 0;
-       static int cachelineinc = 16;
 
-       if (!cachelinesize) {
-#ifdef __APPLE__
-               int mib [3];
-               size_t len;
-               mib [0] = CTL_HW;
-               mib [1] = HW_CACHELINE;
-               len = sizeof (cachelinesize);
-               if (sysctl(mib, 2, &cachelinesize, (size_t*)&len, NULL, 0) == -1) {
-                       perror ("sysctl");
-                       cachelinesize = 128;
-               } else {
-                       cachelineinc = cachelinesize;
-                       /*g_print ("setting cl size to %d\n", cachelinesize);*/
-               }
-#elif defined(__linux__)
-               /* sadly this will work only with 2.6 kernels... */
-               FILE* f = fopen ("/proc/self/auxv", "rb");
-               if (f) {
-                       AuxVec vec;
-                       while (fread (&vec, sizeof (vec), 1, f) == 1) {
-                               if (vec.type == 19) {
-                                       cachelinesize = vec.value;
-                                       break;
-                               }
-                       }
-                       fclose (f);
-               }
-               if (!cachelinesize)
-                       cachelinesize = 128;
-#elif defined(G_COMPILER_CODEWARRIOR)
-       cachelinesize = 32;
-       cachelineinc = 32;
-#else
-#warning Need a way to get cache line size
-               cachelinesize = 128;
-#endif
-       }
        p = start = code;
        endp = p + size;
        start = (guint8*)((gsize)start & ~(cachelinesize - 1));
        /* use dcbf for smp support, later optimize for UP, see pem._64bit.d20030611.pdf page 211 */
 #if defined(G_COMPILER_CODEWARRIOR)
-       if (1) {
+       if (cpu_hw_caps & PPC_SMP_CAPABLE) {
                for (p = start; p < endp; p += cachelineinc) {
                        asm { dcbf 0, p };
                }
@@ -725,7 +849,19 @@ mono_arch_flush_icache (guint8 *code, gint size)
                isync
        }
 #else
-       if (1) {
+       /* For POWER5/6 with ICACHE_SNOOPing only one icbi in the range is required.
+        * The sync is required to insure that the store queue is completely empty.
+        * While the icbi performs no cache operations, icbi/isync is required to
+        * kill local prefetch.
+        */
+       if (cpu_hw_caps & PPC_ICACHE_SNOOP) {
+               asm ("sync");
+               asm ("icbi 0,%0;" : : "r"(code) : "memory");
+               asm ("isync");
+               return;
+       }
+       /* use dcbf for smp support, see pem._64bit.d20030611.pdf page 211 */
+       if (cpu_hw_caps & PPC_SMP_CAPABLE) {
                for (p = start; p < endp; p += cachelineinc) {
                        asm ("dcbf 0,%0;" : : "r"(p) : "memory");
                }
@@ -737,9 +873,18 @@ mono_arch_flush_icache (guint8 *code, gint size)
        asm ("sync");
        p = code;
        for (p = start; p < endp; p += cachelineinc) {
-               asm ("icbi 0,%0; sync;" : : "r"(p) : "memory");
+               /* for ISA2.0+ implementations we should not need any extra sync between the
+                * icbi instructions.  Both the 2.0 PEM and the PowerISA-2.05 say this.
+                * So I am not sure which chip had this problem but its not an issue on
+                * of the ISA V2 chips.
+                */
+               if (cpu_hw_caps & PPC_ISA_2X)
+                       asm ("icbi 0,%0;" : : "r"(p) : "memory");
+               else
+                       asm ("icbi 0,%0; sync;" : : "r"(p) : "memory");
        }
-       asm ("sync");
+       if (!(cpu_hw_caps & PPC_ISA_2X))
+               asm ("sync");
        asm ("isync");
 #endif
 #endif
@@ -776,6 +921,7 @@ typedef struct {
        gint32  offset;
        guint32 vtsize; /* in param area */
        guint8  reg;
+       guint8  vtregs; /* number of registers used to pass a RegTypeStructByVal */
        guint8  regtype : 4; /* 0 general, 1 basereg, 2 floating point register, see RegType* */
        guint8  size    : 4; /* 1, 2, 4, 8, or regs used by RegTypeStructByVal */
        guint8  bytes   : 4; /* size in bytes - only valid for
@@ -860,7 +1006,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 {
        guint i, fr, gr;
        int n = sig->hasthis + sig->param_count;
-       guint32 simpletype;
+       MonoType *simpletype;
        guint32 stack_size = 0;
        CallInfo *cinfo = g_malloc0 (sizeof (CallInfo) + sizeof (ArgInfo) * n);
 
@@ -895,8 +1041,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        n++;
                        continue;
                }
-               simpletype = mini_type_get_underlying_type (NULL, sig->params [i])->type;
-               switch (simpletype) {
+               simpletype = mini_type_get_underlying_type (NULL, sig->params [i]);
+               switch (simpletype->type) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
                case MONO_TYPE_U1:
@@ -931,21 +1077,26 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        n++;
                        break;
                case MONO_TYPE_GENERICINST:
-                       if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
+                       if (!mono_type_generic_inst_is_valuetype (simpletype)) {
                                cinfo->args [n].size = sizeof (gpointer);
                                add_general (&gr, &stack_size, cinfo->args + n, TRUE);
                                n++;
                                break;
                        }
                        /* Fall through */
-               case MONO_TYPE_VALUETYPE: {
+               case MONO_TYPE_VALUETYPE:
+               case MONO_TYPE_TYPEDBYREF: {
                        gint size;
                        MonoClass *klass;
+
                        klass = mono_class_from_mono_type (sig->params [i]);
-                       if (is_pinvoke)
+                       if (simpletype->type == MONO_TYPE_TYPEDBYREF)
+                               size = sizeof (MonoTypedRef);
+                       else if (is_pinvoke)
                            size = mono_class_native_size (klass, NULL);
                        else
                            size = mono_class_value_size (klass, NULL);
+
 #if defined(__APPLE__) || defined(__mono_ppc64__)
                        if ((size == 4 || size == 8) && has_only_a_r48_field (klass)) {
                                cinfo->args [n].size = size;
@@ -971,76 +1122,41 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
 #endif
                        DEBUG(printf ("load %d bytes struct\n",
                                      mono_class_native_size (sig->params [i]->data.klass, NULL)));
+
 #if PPC_PASS_STRUCTS_BY_VALUE
                        {
                                int align_size = size;
-                               int nwords = 0;
+                               int nregs = 0;
                                int rest = PPC_LAST_ARG_REG - gr + 1;
                                int n_in_regs;
+
                                align_size += (sizeof (gpointer) - 1);
                                align_size &= ~(sizeof (gpointer) - 1);
-                               nwords = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
-                               n_in_regs = MIN (rest, nwords);
-                               cinfo->args [n].regtype = RegTypeStructByVal;
-                               if (gr > PPC_LAST_ARG_REG
+                               nregs = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
+                               n_in_regs = MIN (rest, nregs);
+                               if (n_in_regs < 0)
+                                       n_in_regs = 0;
 #ifdef __APPLE__
-                                               /* FIXME: check this */
-                                               || (size >= 3 && size % 4 != 0)
-#endif
-                                               ) {
-                                       cinfo->args [n].size = 0;
-                                       cinfo->args [n].vtsize = nwords;
-                               } else {
-                                       cinfo->args [n].size = n_in_regs;
-                                       cinfo->args [n].vtsize = nwords - n_in_regs;
-                                       cinfo->args [n].reg = gr;
-                               }
-#ifdef __mono_ppc64__
-                               if (nwords == 1 && is_pinvoke)
-                                       cinfo->args [n].bytes = size;
-                               else
-#endif
-                                       cinfo->args [n].bytes = 0;
-                               gr += n_in_regs;
-                               cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
-                               /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/
-                               stack_size += nwords * sizeof (gpointer);
-                       }
-#else
-                       add_general (&gr, &stack_size, cinfo->args + n, TRUE);
-                       cinfo->args [n].regtype = RegTypeStructByAddr;
-                       cinfo->args [n].vtsize = size;
+                               /* FIXME: check this */
+                               if (size >= 3 && size % 4 != 0)
+                                       n_in_regs = 0;
 #endif
-                       n++;
-                       break;
-               }
-               case MONO_TYPE_TYPEDBYREF: {
-                       int size = sizeof (MonoTypedRef);
-                       /* keep in sync or merge with the valuetype case */
-#if PPC_PASS_STRUCTS_BY_VALUE
-                       {
-                               int nwords = (size + sizeof (gpointer) -1 ) / sizeof (gpointer);
                                cinfo->args [n].regtype = RegTypeStructByVal;
-                               if (gr <= PPC_LAST_ARG_REG) {
-                                       int rest = PPC_LAST_ARG_REG - gr + 1;
-                                       int n_in_regs = rest >= nwords? nwords: rest;
-                                       cinfo->args [n].size = n_in_regs;
-                                       cinfo->args [n].vtsize = nwords - n_in_regs;
-                                       cinfo->args [n].reg = gr;
-                                       gr += n_in_regs;
-                               } else {
-                                       cinfo->args [n].size = 0;
-                                       cinfo->args [n].vtsize = nwords;
-                               }
+                               cinfo->args [n].vtregs = n_in_regs;
+                               cinfo->args [n].size = n_in_regs;
+                               cinfo->args [n].vtsize = nregs - n_in_regs;
+                               cinfo->args [n].reg = gr;
+
 #ifdef __mono_ppc64__
-                               if (nwords == 1 && is_pinvoke)
+                               if (nregs == 1 && is_pinvoke)
                                        cinfo->args [n].bytes = size;
                                else
 #endif
                                        cinfo->args [n].bytes = 0;
+                               gr += n_in_regs;
                                cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
                                /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/
-                               stack_size += nwords * sizeof (gpointer);
+                               stack_size += nregs * sizeof (gpointer);
                        }
 #else
                        add_general (&gr, &stack_size, cinfo->args + n, TRUE);
@@ -1105,8 +1221,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
        }
 
        {
-               simpletype = mini_type_get_underlying_type (NULL, sig->ret)->type;
-               switch (simpletype) {
+               simpletype = mini_type_get_underlying_type (NULL, sig->ret);
+               switch (simpletype->type) {
                case MONO_TYPE_BOOLEAN:
                case MONO_TYPE_I1:
                case MONO_TYPE_U1:
@@ -1136,7 +1252,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                        cinfo->ret.regtype = RegTypeFP;
                        break;
                case MONO_TYPE_GENERICINST:
-                       if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
+                       if (!mono_type_generic_inst_is_valuetype (simpletype)) {
                                cinfo->ret.reg = ppc_r3;
                                break;
                        }
@@ -1222,7 +1338,7 @@ mono_arch_allocate_vars (MonoCompile *m)
        if (m->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)
                m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
 
-       header = mono_method_get_header (m->method);
+       header = m->header;
 
        /* 
         * We use the frame register also for any method that has
@@ -1587,7 +1703,7 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
                        mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg, FALSE);
                } else
 #endif
-                       for (i = 0; i < ainfo->size; ++i) {
+                       for (i = 0; i < ainfo->vtregs; ++i) {
                                int antipadding = 0;
                                if (ainfo->bytes) {
                                        g_assert (i == 0);
@@ -1678,7 +1794,7 @@ mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean ena
 {
        guchar *code = p;
 
-       ppc_load (code, ppc_r3, cfg->method);
+       ppc_load_ptr (code, ppc_r3, cfg->method);
        ppc_li (code, ppc_r4, 0); /* NULL ebp for now */
        ppc_load_func (code, ppc_r0, func);
        ppc_mtlr (code, ppc_r0);
@@ -1777,7 +1893,7 @@ mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolea
                break;
        }
 
-       ppc_load (code, ppc_r3, cfg->method);
+       ppc_load_ptr (code, ppc_r3, cfg->method);
        ppc_load_func (code, ppc_r0, func);
        ppc_mtlr (code, ppc_r0);
        ppc_blrl (code);
@@ -1874,6 +1990,7 @@ static int
 normalize_opcode (int opcode)
 {
        switch (opcode) {
+#ifndef __mono_ilp32__
        case MONO_PPC_32_64_CASE (OP_LOADI4_MEMBASE, OP_LOADI8_MEMBASE):
                return OP_LOAD_MEMBASE;
        case MONO_PPC_32_64_CASE (OP_LOADI4_MEMINDEX, OP_LOADI8_MEMINDEX):
@@ -1884,6 +2001,7 @@ normalize_opcode (int opcode)
                return OP_STORE_MEMBASE_IMM;
        case MONO_PPC_32_64_CASE (OP_STOREI4_MEMINDEX, OP_STOREI8_MEMINDEX):
                return OP_STORE_MEMINDEX;
+#endif
        case MONO_PPC_32_64_CASE (OP_ISHR_IMM, OP_LSHR_IMM):
                return OP_SHR_IMM;
        case MONO_PPC_32_64_CASE (OP_ISHR_UN_IMM, OP_LSHR_UN_IMM):
@@ -2058,28 +2176,34 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
 #ifndef __mono_ppc64__
        case OP_ICONV_TO_R4:
        case OP_ICONV_TO_R8: {
-               /* FIXME: change precision for CEE_CONV_R4 */
-               static const guint64 adjust_val = 0x4330000080000000ULL;
-               int msw_reg = mono_alloc_ireg (cfg);
-               int xored = mono_alloc_ireg (cfg);
-               int adj_reg = mono_alloc_freg (cfg);
-               int tmp_reg = mono_alloc_freg (cfg);
-               int basereg = ppc_sp;
-               int offset = -8;
-               if (!ppc_is_imm16 (offset + 4)) {
-                       basereg = mono_alloc_ireg (cfg);
-                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
+               /* If we have a PPC_FEATURE_64 machine we can avoid
+                  this and use the fcfid instruction.  Otherwise
+                  on an old 32-bit chip and we have to do this the
+                  hard way.  */
+               if (!(cpu_hw_caps & PPC_ISA_64)) {
+                       /* FIXME: change precision for CEE_CONV_R4 */
+                       static const guint64 adjust_val = 0x4330000080000000ULL;
+                       int msw_reg = mono_alloc_ireg (cfg);
+                       int xored = mono_alloc_ireg (cfg);
+                       int adj_reg = mono_alloc_freg (cfg);
+                       int tmp_reg = mono_alloc_freg (cfg);
+                       int basereg = ppc_sp;
+                       int offset = -8;
+                       if (!ppc_is_imm16 (offset + 4)) {
+                               basereg = mono_alloc_ireg (cfg);
+                               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
+                       }
+                       MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
+                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
+                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
+                       MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored);
+                       MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
+                       MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
+                       MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
+                       if (ins->opcode == OP_ICONV_TO_R4)
+                               MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
+                       ins->opcode = OP_NOP;
                }
-               MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
-               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
-               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
-               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored);
-               MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
-               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
-               MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
-               if (ins->opcode == OP_ICONV_TO_R4)
-                       MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
-               ins->opcode = OP_NOP;
                break;
        }
 #endif
@@ -2395,7 +2519,7 @@ loop_start:
                case OP_XOR_IMM: {
                        gboolean is_imm = ((ins->inst_imm & 0xffff0000) && (ins->inst_imm & 0xffff));
 #ifdef __mono_ppc64__
-                       if (ins->inst_imm & 0xffffffff00000000UL)
+                       if (ins->inst_imm & 0xffffffff00000000ULL)
                                is_imm = TRUE;
 #endif
                        if (is_imm) {
@@ -2563,7 +2687,7 @@ emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size,
        if (ppc_is_imm16 (offset + sub_offset)) {
                ppc_stfd (code, ppc_f0, offset, cfg->frame_reg);
                if (size == 8)
-                       ppc_ldptr (code, dreg, offset + sub_offset, cfg->frame_reg);
+                       ppc_ldr (code, dreg, offset + sub_offset, cfg->frame_reg);
                else
                        ppc_lwz (code, dreg, offset + sub_offset, cfg->frame_reg);
        } else {
@@ -2571,7 +2695,7 @@ emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size,
                ppc_add (code, dreg, dreg, cfg->frame_reg);
                ppc_stfd (code, ppc_f0, 0, dreg);
                if (size == 8)
-                       ppc_ldptr (code, dreg, sub_offset, dreg);
+                       ppc_ldr (code, dreg, sub_offset, dreg);
                else
                        ppc_lwz (code, dreg, sub_offset, dreg);
        }
@@ -2700,14 +2824,14 @@ handle_thunk (int absolute, guchar *code, const guchar *target) {
 static void
 patch_ins (guint8 *code, guint32 ins)
 {
-       *(guint32*)code = ins;
+       *(guint32*)code = GUINT32_TO_BE (ins);
        mono_arch_flush_icache (code, 4);
 }
 
 void
 ppc_patch_full (guchar *code, const guchar *target, gboolean is_fd)
 {
-       guint32 ins = *(guint32*)code;
+       guint32 ins = GUINT32_FROM_BE (*(guint32*)code);
        guint32 prim = ins >> 26;
        guint32 ovf;
 
@@ -2815,7 +2939,7 @@ ppc_patch_full (guchar *code, const guchar *target, gboolean is_fd)
 
                /* FIXME: make this thread safe */
                /* FIXME: we're assuming we're using r11 here */
-               ppc_load_sequence (code, ppc_r11, target);
+               ppc_load_ptr_sequence (code, ppc_r11, target);
                mono_arch_flush_icache ((guint8*)seq, 28);
 #else
                guint32 *seq;
@@ -2980,7 +3104,7 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
                                NOT_IMPLEMENTED;
                        } else
 #endif
-                               for (j = 0; j < ainfo->size; ++j) {
+                               for (j = 0; j < ainfo->vtregs; ++j) {
                                        ppc_ldptr (code, ainfo->reg + j,
                                                        inst->inst_offset + j * sizeof (gpointer),
                                                        inst->inst_basereg);
@@ -3162,6 +3286,33 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_NOT_REACHED:
                case OP_NOT_NULL:
                        break;
+               case OP_SEQ_POINT: {
+                       int i;
+
+                       if (cfg->compile_aot)
+                               NOT_IMPLEMENTED;
+
+                       /* 
+                        * Read from the single stepping trigger page. This will cause a
+                        * SIGSEGV when single stepping is enabled.
+                        * We do this _before_ the breakpoint, so single stepping after
+                        * a breakpoint is hit will step to the next IL offset.
+                        */
+                       if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
+                               ppc_load (code, ppc_r11, (gsize)ss_trigger_page);
+                               ppc_ldptr (code, ppc_r11, 0, ppc_r11);
+                       }
+
+                       mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
+
+                       /* 
+                        * A placeholder for a possible breakpoint inserted by
+                        * mono_arch_set_breakpoint ().
+                        */
+                       for (i = 0; i < BREAKPOINT_SIZE / 4; ++i)
+                               ppc_nop (code);
+                       break;
+               }
                case OP_TLS_GET:
                        emit_tls_access (code, ins->dreg, ins->inst_offset);
                        break;
@@ -3182,34 +3333,59 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stb (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_stb (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_STOREI2_MEMBASE_REG:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_sth (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_sth (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_STORE_MEMBASE_REG:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stptr (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
+                       } else {
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_stptr (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_stptr_indexed (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
+                       }
+                       break;
+#ifdef __mono_ilp32__
+               case OP_STOREI8_MEMBASE_REG:
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_str (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
                                ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_stptr_indexed (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               ppc_str_indexed (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
+#endif
                case OP_STOREI1_MEMINDEX:
-                       ppc_stbx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_STOREI2_MEMINDEX:
-                       ppc_sthx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_STORE_MEMINDEX:
-                       ppc_stptr_indexed (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_stptr_indexed (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_LOADU4_MEM:
                        g_assert_not_reached ();
@@ -3218,8 +3394,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_ldptr (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_ldptr_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_ldptr (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_ldptr_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADI4_MEMBASE:
@@ -3227,8 +3408,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lwa (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lwax (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lwa (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lwax (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
                        }
                        break;
 #endif
@@ -3236,8 +3422,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lwz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lwzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lwz (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lwzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADI1_MEMBASE:
@@ -3245,8 +3436,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lbz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lbzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lbz (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lbzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
                        }
                        if (ins->opcode == OP_LOADI1_MEMBASE)
                                ppc_extsb (code, ins->dreg, ins->dreg);
@@ -3255,40 +3451,60 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lhz (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lhzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lhz (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lhzx (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADI2_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lha (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
+                       } else {
+                               if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) {
+                                       ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset));
+                                       ppc_lha (code, ins->dreg, ins->inst_offset, ins->dreg);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lhax (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               }
+                       }
+                       break;
+#ifdef __mono_ilp32__
+               case OP_LOADI8_MEMBASE:
+                       if (ppc_is_imm16 (ins->inst_offset)) {
+                               ppc_ldr (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
                                ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lhax (code, ins->dreg, ins->inst_basereg, ppc_r0);
+                               ppc_ldr_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0);
                        }
                        break;
+#endif
                case OP_LOAD_MEMINDEX:
-                       ppc_ldptr_indexed (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_ldptr_indexed (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADI4_MEMINDEX:
 #ifdef __mono_ppc64__
-                       ppc_lwax (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lwax (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
 #endif
                case OP_LOADU4_MEMINDEX:
-                       ppc_lwzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lwzx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADU2_MEMINDEX:
-                       ppc_lhzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lhzx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADI2_MEMINDEX:
-                       ppc_lhax (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lhax (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADU1_MEMINDEX:
-                       ppc_lbzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lbzx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADI1_MEMINDEX:
-                       ppc_lbzx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lbzx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        ppc_extsb (code, ins->dreg, ins->dreg);
                        break;
                case OP_ICONV_TO_I1:
@@ -3310,7 +3526,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COMPARE:
                case OP_ICOMPARE:
                CASE_PPC64 (OP_LCOMPARE)
-                       L = (sizeof (gpointer) == 4 || ins->opcode == OP_ICOMPARE) ? 0 : 1;
+                       L = (sizeof (mgreg_t) == 4 || ins->opcode == OP_ICOMPARE) ? 0 : 1;
                        next = ins->next;
                        if (next && compare_opcode_is_unsigned (next->opcode))
                                ppc_cmpl (code, 0, L, ins->sreg1, ins->sreg2);
@@ -3320,7 +3536,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_COMPARE_IMM:
                case OP_ICOMPARE_IMM:
                CASE_PPC64 (OP_LCOMPARE_IMM)
-                       L = (sizeof (gpointer) == 4 || ins->opcode == OP_ICOMPARE_IMM) ? 0 : 1;
+                       L = (sizeof (mgreg_t) == 4 || ins->opcode == OP_ICOMPARE_IMM) ? 0 : 1;
                        next = ins->next;
                        if (next && compare_opcode_is_unsigned (next->opcode)) {
                                if (ppc_is_uimm16 (ins->inst_imm)) {
@@ -3631,9 +3847,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_multiply (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_ICONST:
-               CASE_PPC64 (OP_I8CONST)
                        ppc_load (code, ins->dreg, ins->inst_c0);
                        break;
+               case OP_I8CONST: {
+                       ppc_load (code, ins->dreg, ins->inst_l);
+                       break;
+               }
                case OP_LOAD_GOTADDR:
                        /* The PLT implementation depends on this */
                        g_assert (ins->dreg == ppc_r30);
@@ -3701,8 +3920,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (cfg->stack_usage)) {
                                ppc_addi (code, ppc_r11, cfg->frame_reg, cfg->stack_usage);
                        } else {
-                               ppc_load (code, ppc_r11, cfg->stack_usage);
-                               ppc_add (code, ppc_r11, cfg->frame_reg, ppc_r11);
+                               /* cfg->stack_usage is an int, so we can use
+                                * an addis/addi sequence here even in 64-bit.  */
+                               ppc_addis (code, ppc_r11, cfg->frame_reg, ppc_ha(cfg->stack_usage));
+                               ppc_addi (code, ppc_r11, ppc_r11, cfg->stack_usage);
                        }
                        if (!cfg->method->save_lmf) {
                                /*for (i = 31; i >= 14; --i) {
@@ -3800,7 +4021,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_VCALL2_MEMBASE:
                case OP_VOIDCALL_MEMBASE:
                case OP_CALL_MEMBASE:
-                       ppc_ldptr (code, ppc_r0, ins->inst_offset, ins->sreg1);
+                       if (cfg->compile_aot && ins->sreg1 == ppc_r11) {
+                               /* The trampolines clobber this */
+                               ppc_mr (code, ppc_r29, ins->sreg1);
+                               ppc_ldptr (code, ppc_r0, ins->inst_offset, ppc_r29);
+                       } else {
+                               ppc_ldptr (code, ppc_r0, ins->inst_offset, ins->sreg1);
+                       }
                        ppc_mtlr (code, ppc_r0);
                        ppc_blrl (code);
                        /* FIXME: this should be handled somewhere else in the new jit */
@@ -4017,16 +4244,26 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stfd (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_stfd (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADR8_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lfd (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lfdx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_lfd (code, ins->dreg, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lfdx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_STORER4_MEMBASE_REG:
@@ -4034,30 +4271,40 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_stfs (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_stfs (code, ins->sreg1, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADR4_MEMBASE:
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_lfs (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               ppc_load (code, ppc_r0, ins->inst_offset);
-                               ppc_lfsx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
+                               if (ppc_is_imm32 (ins->inst_offset)) {
+                                       ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset));
+                                       ppc_lfs (code, ins->dreg, ins->inst_offset, ppc_r12);
+                               } else {
+                                       ppc_load (code, ppc_r0, ins->inst_offset);
+                                       ppc_lfsx (code, ins->dreg, ins->inst_destbasereg, ppc_r0);
+                               }
                        }
                        break;
                case OP_LOADR4_MEMINDEX:
-                       ppc_lfsx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lfsx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_LOADR8_MEMINDEX:
-                       ppc_lfdx (code, ins->dreg, ins->sreg2, ins->inst_basereg);
+                       ppc_lfdx (code, ins->dreg, ins->inst_basereg, ins->sreg2);
                        break;
                case OP_STORER4_MEMINDEX:
                        ppc_frsp (code, ins->sreg1, ins->sreg1);
-                       ppc_stfsx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case OP_STORER8_MEMINDEX:
-                       ppc_stfdx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg);
+                       ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case CEE_CONV_R_UN:
                case CEE_CONV_R4: /* FIXME: change precision */
@@ -4221,7 +4468,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_JUMP_TABLE:
                        mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
 #ifdef __mono_ppc64__
-                       ppc_load_sequence (code, ins->dreg, (gulong)0x0f0f0f0f0f0f0f0fL);
+                       ppc_load_sequence (code, ins->dreg, (guint64)0x0f0f0f0f0f0f0f0fLL);
 #else
                        ppc_load_sequence (code, ins->dreg, (gulong)0x0f0f0f0fL);
 #endif
@@ -4248,8 +4495,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        } else {
                                tmp = ins->sreg1;
                        }
-                       ppc_stptr (code, tmp, -8, ppc_r1);
-                       ppc_lfd (code, ins->dreg, -8, ppc_r1);
+                       if (cpu_hw_caps & PPC_MOVE_FPR_GPR) {
+                               ppc_mffgpr (code, ins->dreg, tmp);
+                       } else {
+                               ppc_str (code, tmp, -8, ppc_r1);
+                               ppc_lfd (code, ins->dreg, -8, ppc_r1);
+                       }
                        ppc_fcfid (code, ins->dreg, ins->dreg);
                        if (ins->opcode == OP_ICONV_TO_R4 || ins->opcode == OP_LCONV_TO_R4)
                                ppc_frsp (code, ins->dreg, ins->dreg);
@@ -4330,6 +4581,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_mr (code, ins->dreg, ppc_r0);
                        break;
                }
+#else
+               case OP_ICONV_TO_R4:
+               case OP_ICONV_TO_R8: {
+                       if (cpu_hw_caps & PPC_ISA_64) {
+                               ppc_srawi(code, ppc_r0, ins->sreg1, 31);
+                               ppc_stw (code, ppc_r0, -8, ppc_r1);
+                               ppc_stw (code, ins->sreg1, -4, ppc_r1);
+                               ppc_lfd (code, ins->dreg, -8, ppc_r1);
+                               ppc_fcfid (code, ins->dreg, ins->dreg);
+                               if (ins->opcode == OP_ICONV_TO_R4)
+                                       ppc_frsp (code, ins->dreg, ins->dreg);
+                               }
+                       break;
+               }
 #endif
                case OP_ATOMIC_CAS_I4:
                CASE_PPC64 (OP_ATOMIC_CAS_I8) {
@@ -4395,10 +4660,11 @@ mono_arch_register_lowlevel_calls (void)
 #ifdef __mono_ppc64__
 #define patch_load_sequence(ip,val) do {\
                guint16 *__load = (guint16*)(ip);       \
-               __load [1] = (((guint64)(val)) >> 48) & 0xffff; \
-               __load [3] = (((guint64)(val)) >> 32) & 0xffff; \
-               __load [7] = (((guint64)(val)) >> 16) & 0xffff; \
-               __load [9] =  ((guint64)(val))        & 0xffff; \
+               g_assert (sizeof (val) == sizeof (gsize)); \
+               __load [1] = (((guint64)(gsize)(val)) >> 48) & 0xffff;  \
+               __load [3] = (((guint64)(gsize)(val)) >> 32) & 0xffff;  \
+               __load [7] = (((guint64)(gsize)(val)) >> 16) & 0xffff;  \
+               __load [9] =  ((guint64)(gsize)(val))        & 0xffff;  \
        } while (0)
 #else
 #define patch_load_sequence(ip,val) do {\
@@ -4564,19 +4830,18 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
                tracing = 1;
 
-       /* We currently emit unwind info for aot, but don't use it */
-       mono_emit_unwind_op_def_cfa (cfg, code, ppc_r1, 0);
-
        sig = mono_method_signature (method);
        cfg->code_size = MONO_PPC_32_64_CASE (260, 384) + sig->param_count * 20;
        code = cfg->native_code = g_malloc (cfg->code_size);
 
        cfa_offset = 0;
 
+       /* We currently emit unwind info for aot, but don't use it */
+       mono_emit_unwind_op_def_cfa (cfg, code, ppc_r1, 0);
+
        if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
                ppc_mflr (code, ppc_r0);
                ppc_str (code, ppc_r0, PPC_RET_ADDR_OFFSET, ppc_sp);
-
                mono_emit_unwind_op_offset (cfg, code, ppc_lr, PPC_RET_ADDR_OFFSET);
        }
 
@@ -4586,7 +4851,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        if (!method->save_lmf) {
                for (i = 31; i >= 13; --i) {
                        if (cfg->used_int_regs & (1 << i)) {
-                               pos += sizeof (gulong);
+                               pos += sizeof (mgreg_t);
                        }
                }
        } else {
@@ -4679,7 +4944,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        else if (ainfo->regtype == RegTypeFP)
                                ppc_fmr (code, inst->dreg, ainfo->reg);
                        else if (ainfo->regtype == RegTypeBase) {
-                               ppc_ldptr (code, ppc_r11, 0, ppc_sp);
+                               ppc_ldr (code, ppc_r11, 0, ppc_sp);
                                ppc_ldptr (code, inst->dreg, ainfo->offset, ppc_r11);
                        } else
                                g_assert_not_reached ();
@@ -4694,25 +4959,48 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_stb (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stbx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stb (code, ainfo->reg, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stbx (code, ainfo->reg, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
                                case 2:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_sth (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_sthx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_sth (code, ainfo->reg, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_sthx (code, ainfo->reg, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
 #ifdef __mono_ppc64__
                                case 4:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stw (code, ainfo->reg, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stwx (code, ainfo->reg, inst->inst_basereg, ppc_r11);
+                                               }
+                                       }
+                                       break;
+                               case 8:
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_str (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                        } else {
                                                ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stwx (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                               ppc_str_indexed (code, ainfo->reg, ppc_r11, inst->inst_basereg);
                                        }
                                        break;
 #else
@@ -4721,8 +5009,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                                ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                                ppc_stw (code, ainfo->reg + 1, inst->inst_offset + 4, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_add (code, ppc_r11, ppc_r11, inst->inst_basereg);
+                                               ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                               ppc_addi (code, ppc_r11, ppc_r11, inst->inst_offset);
                                                ppc_stw (code, ainfo->reg, 0, ppc_r11);
                                                ppc_stw (code, ainfo->reg + 1, 4, ppc_r11);
                                        }
@@ -4732,50 +5020,84 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_stptr (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stptr_indexed (code, ainfo->reg, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stptr (code, ainfo->reg, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stptr_indexed (code, ainfo->reg, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
                                }
                        } else if (ainfo->regtype == RegTypeBase) {
+                               g_assert (ppc_is_imm16 (ainfo->offset));
                                /* load the previous stack pointer in r11 */
-                               ppc_ldptr (code, ppc_r11, 0, ppc_sp);
+                               ppc_ldr (code, ppc_r11, 0, ppc_sp);
                                ppc_ldptr (code, ppc_r0, ainfo->offset, ppc_r11);
                                switch (ainfo->size) {
                                case 1:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_stb (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stbx (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stb (code, ppc_r0, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stbx (code, ppc_r0, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
                                case 2:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_sth (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_sthx (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_sth (code, ppc_r0, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_sthx (code, ppc_r0, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
 #ifdef __mono_ppc64__
                                case 4:
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
+                                       } else {
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stw (code, ppc_r0, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stwx (code, ppc_r0, inst->inst_basereg, ppc_r11);
+                                               }
+                                       }
+                                       break;
+                               case 8:
+                                       if (ppc_is_imm16 (inst->inst_offset)) {
+                                               ppc_str (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
                                        } else {
                                                ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stwx (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                               ppc_str_indexed (code, ppc_r0, ppc_r11, inst->inst_basereg);
                                        }
                                        break;
 #else
                                case 8:
+                                       g_assert (ppc_is_imm16 (ainfo->offset + 4));
                                        if (ppc_is_imm16 (inst->inst_offset + 4)) {
                                                ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
                                                ppc_lwz (code, ppc_r0, ainfo->offset + 4, ppc_r11);
                                                ppc_stw (code, ppc_r0, inst->inst_offset + 4, inst->inst_basereg);
                                        } else {
-                                               /* FIXME */
-                                               g_assert_not_reached ();
+                                               /* use r12 to load the 2nd half of the long before we clobber r11.  */
+                                               ppc_lwz (code, ppc_r12, ainfo->offset + 4, ppc_r11);
+                                               ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                               ppc_addi (code, ppc_r11, ppc_r11, inst->inst_offset);
+                                               ppc_stw (code, ppc_r0, 0, ppc_r11);
+                                               ppc_stw (code, ppc_r12, 4, ppc_r11);
                                        }
                                        break;
 #endif
@@ -4783,8 +5105,13 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        if (ppc_is_imm16 (inst->inst_offset)) {
                                                ppc_stptr (code, ppc_r0, inst->inst_offset, inst->inst_basereg);
                                        } else {
-                                               ppc_load (code, ppc_r11, inst->inst_offset);
-                                               ppc_stptr_indexed (code, ppc_r0, ppc_r11, inst->inst_basereg);
+                                               if (ppc_is_imm32 (inst->inst_offset)) {
+                                                       ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset));
+                                                       ppc_stptr (code, ppc_r0, ppc_r11, inst->inst_offset);
+                                               } else {
+                                                       ppc_load (code, ppc_r11, inst->inst_offset);
+                                                       ppc_stptr_indexed (code, ppc_r0, inst->inst_basereg, ppc_r11);
+                                               }
                                        }
                                        break;
                                }
@@ -4802,11 +5129,11 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                int cur_reg;
                                int size = 0;
                                g_assert (ppc_is_imm16 (inst->inst_offset));
-                               g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->size * sizeof (gpointer)));
+                               g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->vtregs * sizeof (gpointer)));
                                /* FIXME: what if there is no class? */
                                if (sig->pinvoke && mono_class_from_mono_type (inst->inst_vtype))
                                        size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
-                               for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
+                               for (cur_reg = 0; cur_reg < ainfo->vtregs; ++cur_reg) {
 #if __APPLE__
                                        /*
                                         * Darwin handles 1 and 2 byte
@@ -4843,7 +5170,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        if (ainfo->bytes)
                                                NOT_IMPLEMENTED;
                                        /* load the previous stack pointer in r11 (r0 gets overwritten by the memcpy) */
-                                       ppc_ldptr (code, ppc_r11, 0, ppc_sp);
+                                       ppc_ldr (code, ppc_r11, 0, ppc_sp);
                                        if ((size & MONO_PPC_32_64_CASE (3, 7)) != 0) {
                                                code = emit_memcpy (code, size - soffset,
                                                        inst->inst_basereg, doffset,
@@ -4858,7 +5185,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                /* if it was originally a RegTypeBase */
                                if (ainfo->offset) {
                                        /* load the previous stack pointer in r11 */
-                                       ppc_ldptr (code, ppc_r11, 0, ppc_sp);
+                                       ppc_ldr (code, ppc_r11, 0, ppc_sp);
                                        ppc_ldptr (code, ppc_r11, ainfo->offset, ppc_r11);
                                } else {
                                        ppc_mr (code, ppc_r11, ainfo->reg);
@@ -4883,7 +5210,11 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        }
 
        if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
-               ppc_load (code, ppc_r3, cfg->domain);
+               if (cfg->compile_aot)
+                       /* AOT code is only used in the root domain */
+                       ppc_load_ptr (code, ppc_r3, 0);
+               else
+                       ppc_load_ptr (code, ppc_r3, cfg->domain);
                mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
                if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) {
                        ppc_load_func (code, ppc_r0, 0);
@@ -4900,6 +5231,10 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        if (tls_mode != TLS_MODE_NPTL && G_STRUCT_OFFSET (MonoJitTlsData, lmf))
                                ppc_addi (code, ppc_r3, ppc_r3, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
                } else {
+                       if (cfg->compile_aot) {
+                               /* Compute the got address which is needed by the PLT entry */
+                               code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
+                       }
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                     (gpointer)"mono_get_lmf_addr");
                        if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) {
@@ -4930,7 +5265,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        // FIXME:
                        ppc_load (code, ppc_r0, 0);
                else
-                       ppc_load (code, ppc_r0, method);
+                       ppc_load_ptr (code, ppc_r0, method);
                ppc_stptr (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, method), ppc_r11);
                ppc_stptr (code, ppc_sp, G_STRUCT_OFFSET(MonoLMF, ebp), ppc_r11);
                /* save the current IP */
@@ -4940,7 +5275,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                } else {
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
 #ifdef __mono_ppc64__
-                       ppc_load_sequence (code, ppc_r0, (gulong)0x0101010101010101L);
+                       ppc_load_sequence (code, ppc_r0, (guint64)0x0101010101010101LL);
 #else
                        ppc_load_sequence (code, ppc_r0, (gulong)0x01010101L);
 #endif
@@ -5008,7 +5343,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                 * we didn't actually change them (idea from Zoltan).
                 */
                /* restore iregs */
-               ppc_load_multiple_regs (code, ppc_r13, G_STRUCT_OFFSET(MonoLMF, iregs), ppc_r11);
+               ppc_ldr_multiple (code, ppc_r13, G_STRUCT_OFFSET(MonoLMF, iregs), ppc_r11);
                /* restore fregs */
                /*for (i = 14; i < 32; i++) {
                        ppc_lfd (code, i, G_STRUCT_OFFSET(MonoLMF, fregs) + ((i-14) * sizeof (gdouble)), ppc_r11);
@@ -5016,7 +5351,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                g_assert (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET));
                /* use the saved copy of the frame reg in r8 */
                if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-                       ppc_ldptr (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, ppc_r8);
+                       ppc_ldr (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, ppc_r8);
                        ppc_mtlr (code, ppc_r0);
                }
                ppc_addic (code, ppc_sp, ppc_r8, cfg->stack_usage);
@@ -5024,10 +5359,10 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
                        long return_offset = cfg->stack_usage + PPC_RET_ADDR_OFFSET;
                        if (ppc_is_imm16 (return_offset)) {
-                               ppc_ldptr (code, ppc_r0, return_offset, cfg->frame_reg);
+                               ppc_ldr (code, ppc_r0, return_offset, cfg->frame_reg);
                        } else {
                                ppc_load (code, ppc_r11, return_offset);
-                               ppc_ldptr_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11);
+                               ppc_ldr_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11);
                        }
                        ppc_mtlr (code, ppc_r0);
                }
@@ -5035,15 +5370,15 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                        int offset = cfg->stack_usage;
                        for (i = 13; i <= 31; i++) {
                                if (cfg->used_int_regs & (1 << i))
-                                       offset -= sizeof (gulong);
+                                       offset -= sizeof (mgreg_t);
                        }
                        if (cfg->frame_reg != ppc_sp)
                                ppc_mr (code, ppc_r11, cfg->frame_reg);
                        /* note r31 (possibly the frame register) is restored last */
                        for (i = 13; i <= 31; i++) {
                                if (cfg->used_int_regs & (1 << i)) {
-                                       ppc_ldptr (code, i, offset, cfg->frame_reg);
-                                       offset += sizeof (gulong);
+                                       ppc_ldr (code, i, offset, cfg->frame_reg);
+                                       offset += sizeof (mgreg_t);
                                }
                        }
                        if (cfg->frame_reg != ppc_sp)
@@ -5051,13 +5386,13 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                        else
                                ppc_addi (code, ppc_sp, ppc_sp, cfg->stack_usage);
                } else {
-                       ppc_load (code, ppc_r11, cfg->stack_usage);
+                       ppc_load32 (code, ppc_r11, cfg->stack_usage);
                        if (cfg->used_int_regs) {
                                ppc_add (code, ppc_r11, cfg->frame_reg, ppc_r11);
                                for (i = 31; i >= 13; --i) {
                                        if (cfg->used_int_regs & (1 << i)) {
-                                               pos += sizeof (gulong);
-                                               ppc_ldptr (code, i, -pos, ppc_r11);
+                                               pos += sizeof (mgreg_t);
+                                               ppc_ldr (code, i, -pos, ppc_r11);
                                        }
                                }
                                ppc_mr (code, ppc_sp, ppc_r11);
@@ -5111,13 +5446,12 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
      
        /* 
         * make sure we have enough space for exceptions
-        * 28 is the simulated call to throw_corlib_exception
         */
        for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
                if (patch_info->type == MONO_PATCH_INFO_EXC) {
                        i = exception_id_by_name (patch_info->data.target);
                        if (!exc_throw_found [i]) {
-                               max_epilog_size += 28;
+                               max_epilog_size += (2 * PPC_LOAD_SEQUENCE_LENGTH) + 5 * 4;
                                exc_throw_found [i] = TRUE;
                        }
                } else if (patch_info->type == MONO_PATCH_INFO_BB_OVF)
@@ -5126,7 +5460,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                        MonoOvfJump *ovfj = (MonoOvfJump*)patch_info->data.target;
                        i = exception_id_by_name (ovfj->data.exception);
                        if (!exc_throw_found [i]) {
-                               max_epilog_size += 28;
+                               max_epilog_size += (2 * PPC_LOAD_SEQUENCE_LENGTH) + 5 * 4;
                                exc_throw_found [i] = TRUE;
                        }
                        max_epilog_size += 8;
@@ -5254,6 +5588,10 @@ setup_tls_access (void)
        guint32 cmplwi_1023, li_0x48, blr_ins;
 #endif
 
+#ifdef TARGET_PS3
+       tls_mode = TLS_MODE_FAILED;
+#endif
+
        if (tls_mode == TLS_MODE_FAILED)
                return;
        if (g_getenv ("MONO_NO_TLS")) {
@@ -5268,9 +5606,7 @@ setup_tls_access (void)
                conf_size = confstr ( _CS_GNU_LIBPTHREAD_VERSION, confbuf, sizeof(confbuf));
                if ((conf_size > 4) && (strncmp (confbuf, "NPTL", 4) == 0))
                        tls_mode = TLS_MODE_NPTL;
-               else
-                       tls_mode = TLS_MODE_LTHREADS;
-#else
+#elif !defined(TARGET_PS3)
                ins = (guint32*)pthread_getspecific;
                /* uncond branch to the real method */
                if ((*ins >> 26) == 18) {
@@ -5344,6 +5680,11 @@ setup_tls_access (void)
                }
 #endif
        }
+#ifndef TARGET_PS3
+       if (tls_mode == TLS_MODE_DETECT)
+               tls_mode = TLS_MODE_FAILED;
+       if (tls_mode == TLS_MODE_FAILED)
+               return;
        if ((monodomain_key == -1) && (tls_mode == TLS_MODE_NPTL)) {
                monodomain_key = mono_domain_get_tls_offset();
        }
@@ -5375,24 +5716,7 @@ setup_tls_access (void)
                        lmf_pthread_key = ptk;
                }
        }
-
-       if ((monothread_key == -1) && (tls_mode == TLS_MODE_NPTL)) {
-               monothread_key = mono_thread_get_tls_offset();
-       }
-       /* if not TLS_MODE_NPTL or local dynamic (as indicated by
-          mono_get_lmf_addr_tls_offset returning -1) then use keyed access. */
-       if (monothread_key == -1) {
-               ptk = mono_thread_get_tls_key ();
-               if (ptk < 1024) {
-                       ptk = mono_pthread_key_for_tls (ptk);
-                       if (ptk < 1024) {
-                               monothread_key = ptk;
-                               /*g_print ("thread inited: %d\n", ptk);*/
-                       }
-               } else {
-                       /*g_print ("thread not inited yet %d\n", ptk);*/
-               }
-       }
+#endif
 }
 
 void
@@ -5486,7 +5810,7 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                item->jmp_code = code;
                                ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
                                if (item->has_target_code) {
-                                       ppc_load (code, ppc_r0, item->value.target_code);
+                                       ppc_load_ptr (code, ppc_r0, item->value.target_code);
                                } else {
                                        ppc_ldptr (code, ppc_r0, (sizeof (gpointer) * item->value.vtable_slot), ppc_r11);
                                        ppc_ldptr (code, ppc_r11, PPC_RET_ADDR_OFFSET, ppc_sp);
@@ -5500,16 +5824,16 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                                        item->jmp_code = code;
                                        ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
                                        if (item->has_target_code) {
-                                               ppc_load (code, ppc_r0, item->value.target_code);
+                                               ppc_load_ptr (code, ppc_r0, item->value.target_code);
                                        } else {
                                                g_assert (vtable);
-                                               ppc_load (code, ppc_r0, & (vtable->vtable [item->value.vtable_slot]));
+                                               ppc_load_ptr (code, ppc_r0, & (vtable->vtable [item->value.vtable_slot]));
                                                ppc_ldptr_indexed (code, ppc_r0, 0, ppc_r0);
                                        }
                                        ppc_mtctr (code, ppc_r0);
                                        ppc_bcctr (code, PPC_BR_ALWAYS, 0);
                                        ppc_patch (item->jmp_code, code);
-                                       ppc_load (code, ppc_r0, fail_tramp);
+                                       ppc_load_ptr (code, ppc_r0, fail_tramp);
                                        ppc_mtctr (code, ppc_r0);
                                        ppc_bcctr (code, PPC_BR_ALWAYS, 0);
                                        item->jmp_code = NULL;
@@ -5557,22 +5881,20 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
 }
 
 MonoMethod*
-mono_arch_find_imt_method (gpointer *regs, guint8 *code)
+mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
 {
-       return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
-}
+       mgreg_t *r = (mgreg_t*)regs;
 
-MonoObject*
-mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
-{
-       return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL);
+       return (MonoMethod*)(gsize) r [MONO_ARCH_IMT_REG];
 }
 #endif
 
 MonoVTable*
-mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
+mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
 {
-       return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
+       mgreg_t *r = (mgreg_t*)regs;
+
+       return (MonoVTable*)(gsize) r [MONO_ARCH_RGCTX_REG];
 }
 
 MonoInst*
@@ -5601,20 +5923,6 @@ MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
        return ins;
 }
 
-MonoInst* 
-mono_arch_get_thread_intrinsic (MonoCompile* cfg)
-{
-       MonoInst* ins;
-
-       setup_tls_access ();
-       if (monothread_key == -1)
-               return NULL;
-       
-       MONO_INST_NEW (cfg, ins, OP_TLS_GET);
-       ins->inst_offset = monothread_key;
-       return ins;
-}
-
 gpointer
 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
 {
@@ -5623,7 +5931,7 @@ mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
 
        g_assert (reg >= ppc_r13);
 
-       return (gpointer)ctx->regs [reg - ppc_r13];
+       return (gpointer)(gsize)ctx->regs [reg - ppc_r13];
 }
 
 guint32
@@ -5681,3 +5989,172 @@ mono_arch_emit_load_aotconst (guint8 *start, guint8 *code, MonoJumpInfo **ji, in
 
        return code;
 }
+
+/* Soft Debug support */
+#ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
+
+/*
+ * BREAKPOINTS
+ */
+
+/*
+ * mono_arch_set_breakpoint:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
+{
+       guint8 *code = ip;
+       guint8 *orig_code = code;
+
+       ppc_load_sequence (code, ppc_r11, (gsize)bp_trigger_page);
+       ppc_ldptr (code, ppc_r11, 0, ppc_r11);
+
+       g_assert (code - orig_code == BREAKPOINT_SIZE);
+
+       mono_arch_flush_icache (orig_code, code - orig_code);
+}
+
+/*
+ * mono_arch_clear_breakpoint:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
+{
+       guint8 *code = ip;
+       int i;
+
+       for (i = 0; i < BREAKPOINT_SIZE / 4; ++i)
+               ppc_nop (code);
+
+       mono_arch_flush_icache (ip, code - ip);
+}
+
+/*
+ * mono_arch_is_breakpoint_event:
+ *
+ *   See mini-amd64.c for docs.
+ */
+gboolean
+mono_arch_is_breakpoint_event (void *info, void *sigctx)
+{
+       siginfo_t* sinfo = (siginfo_t*) info;
+       /* Sometimes the address is off by 4 */
+       if (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128)
+               return TRUE;
+       else
+               return FALSE;
+}
+
+/*
+ * mono_arch_get_ip_for_breakpoint:
+ *
+ *   See mini-amd64.c for docs.
+ */
+guint8*
+mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx)
+{
+       guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
+
+       /* ip points at the ldptr instruction */
+       ip -= PPC_LOAD_SEQUENCE_LENGTH;
+
+       return ip;
+}
+
+/*
+ * mono_arch_skip_breakpoint:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_skip_breakpoint (MonoContext *ctx)
+{
+       /* skip the ldptr */
+       MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 4);
+}
+
+/*
+ * SINGLE STEPPING
+ */
+       
+/*
+ * mono_arch_start_single_stepping:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_start_single_stepping (void)
+{
+       mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
+}
+       
+/*
+ * mono_arch_stop_single_stepping:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_stop_single_stepping (void)
+{
+       mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
+}
+
+/*
+ * mono_arch_is_single_step_event:
+ *
+ *   See mini-amd64.c for docs.
+ */
+gboolean
+mono_arch_is_single_step_event (void *info, void *sigctx)
+{
+       siginfo_t* sinfo = (siginfo_t*) info;
+       /* Sometimes the address is off by 4 */
+       if (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128)
+               return TRUE;
+       else
+               return FALSE;
+}
+
+/*
+ * mono_arch_get_ip_for_single_step:
+ *
+ *   See mini-amd64.c for docs.
+ */
+guint8*
+mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx)
+{
+       guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
+
+       /* ip points after the ldptr instruction */
+       return ip;
+}
+
+/*
+ * mono_arch_skip_single_step:
+ *
+ *   See mini-amd64.c for docs.
+ */
+void
+mono_arch_skip_single_step (MonoContext *ctx)
+{
+       /* skip the ldptr */
+       MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 4);
+}
+
+/*
+ * mono_arch_create_seq_point_info:
+ *
+ *   See mini-amd64.c for docs.
+ */
+gpointer
+mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
+{
+       NOT_IMPLEMENTED;
+       return NULL;
+}
+
+#endif