2008-12-05 Mark Probst <mark.probst@gmail.com>
[mono.git] / mono / mini / mini-ppc64.c
index 01a2f4eb7249740c5c444d356b084cb2184d759f..78fe45ab4f28bd0d07a89a5bc0cbdeff7dbd2f39 100644 (file)
@@ -23,6 +23,8 @@
 #include <sys/sysctl.h>
 #endif
 
+//#define DEBUG_PATCHING
+
 #define FORCE_INDIR_CALL 1
 
 enum {
@@ -262,10 +264,10 @@ mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement)
                        reg = (*code >> 16) & 0x1f;
                        g_assert (reg != ppc_r1);
                        /*g_print ("patching reg is %d\n", reg);*/
-                       if (reg >= MONO_FIRST_SAVED_GREG) {
-                               MonoLMF *lmf = (MonoLMF*)((char*)regs + (MONO_FIRST_SAVED_FREG * sizeof (double)) + (MONO_FIRST_SAVED_GREG * sizeof (gulong)));
+                       if (reg >= 13) {
+                               MonoLMF *lmf = (MonoLMF*)((char*)regs + (14 * sizeof (double)) + (13 * sizeof (gulong)));
                                /* saved in the MonoLMF structure */
-                               o = (gpointer)lmf->iregs [reg - MONO_FIRST_SAVED_GREG];
+                               o = (gpointer)lmf->iregs [reg - 13];
                        } else {
                                o = regs [reg];
                        }
@@ -625,6 +627,9 @@ typedef struct {
        guint8  reg;
        guint8  regtype : 4; /* 0 general, 1 basereg, 2 floating point register, see RegType* */
        guint8  size    : 4; /* 1, 2, 4, 8, or regs used by RegTypeStructByVal */
+       guint8  bytes   : 4; /* size in bytes - only valid for
+                               RegTypeStructByVal if the struct fits
+                               in one word, otherwise it's 0*/
 } ArgInfo;
 
 typedef struct {
@@ -655,7 +660,6 @@ add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple)
        (*gr) ++;
 }
 
-#if __APPLE__
 static gboolean
 has_only_a_r48_field (MonoClass *klass)
 {
@@ -675,7 +679,6 @@ has_only_a_r48_field (MonoClass *klass)
        }
        return have_field;
 }
-#endif
 
 static CallInfo*
 calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
@@ -768,7 +771,6 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                            size = mono_class_native_size (klass, NULL);
                        else
                            size = mono_class_value_size (klass, NULL);
-#if __APPLE__
                        if ((size == 4 || size == 8) && has_only_a_r48_field (klass)) {
                                cinfo->args [n].size = size;
 
@@ -790,7 +792,6 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                                n++;
                                break;
                        }
-#endif
                        DEBUG(printf ("load %d bytes struct\n",
                                      mono_class_native_size (sig->params [i]->data.klass, NULL)));
 #if PPC_PASS_STRUCTS_BY_VALUE
@@ -802,9 +803,14 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                                align_size += (sizeof (gpointer) - 1);
                                align_size &= ~(sizeof (gpointer) - 1);
                                nwords = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
-                               n_in_regs = rest >= nwords? nwords: rest;
+                               n_in_regs = MIN (rest, nwords);
                                cinfo->args [n].regtype = RegTypeStructByVal;
-                               if (gr > PPC_LAST_ARG_REG || (size >= 3 && size % 4 != 0)) {
+                               if (gr > PPC_LAST_ARG_REG
+#ifdef __APPLE__
+                                               /* FIXME: check this */
+                                               || (size >= 3 && size % 4 != 0)
+#endif
+                                               ) {
                                        cinfo->args [n].size = 0;
                                        cinfo->args [n].vtsize = nwords;
                                } else {
@@ -812,6 +818,10 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                                        cinfo->args [n].vtsize = nwords - n_in_regs;
                                        cinfo->args [n].reg = gr;
                                }
+                               if (nwords == 1 && is_pinvoke)
+                                       cinfo->args [n].bytes = size;
+                               else
+                                       cinfo->args [n].bytes = 0;
                                gr += n_in_regs;
                                cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
                                /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/
@@ -843,6 +853,10 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                                        cinfo->args [n].size = 0;
                                        cinfo->args [n].vtsize = nwords;
                                }
+                               if (nwords == 1 && is_pinvoke)
+                                       cinfo->args [n].bytes = size;
+                               else
+                                       cinfo->args [n].bytes = 0;
                                cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
                                /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/
                                stack_size += nwords * sizeof (gpointer);
@@ -870,12 +884,12 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                                cinfo->args [n].reg = fr;
                                fr ++;
                                FP_ALSO_IN_REG (gr ++);
-                               ALWAYS_ON_STACK (stack_size += 4);
+                               ALWAYS_ON_STACK (stack_size += 8);
                        } else {
-                               cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
+                               cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size + 4;
                                cinfo->args [n].regtype = RegTypeBase;
                                cinfo->args [n].reg = ppc_sp; /* in the caller*/
-                               stack_size += 4;
+                               stack_size += 8;
                        }
                        n++;
                        break;
@@ -886,7 +900,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke)
                                cinfo->args [n].regtype = RegTypeFP;
                                cinfo->args [n].reg = fr;
                                fr ++;
-                               FP_ALSO_IN_REG (gr += 2);
+                               FP_ALSO_IN_REG (gr++);
                                ALWAYS_ON_STACK (stack_size += 8);
                        } else {
                                cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
@@ -1101,9 +1115,6 @@ mono_arch_allocate_vars (MonoCompile *m)
        /* this is a global constant */
        mono_exc_esp_offset = offset;
 #endif
-       if (sig->call_convention == MONO_CALL_VARARG) {
-                m->sig_cookie = PPC_STACK_PARAM_OFFSET;
-        }
 
        if (MONO_TYPE_ISSTRUCT (sig->ret)) {
                offset += sizeof(gpointer) - 1;
@@ -1119,8 +1130,6 @@ mono_arch_allocate_vars (MonoCompile *m)
                }
 
                offset += sizeof(gpointer);
-               if (sig->call_convention == MONO_CALL_VARARG)
-                       m->sig_cookie += sizeof (gpointer);
        }
 
        offsets = mono_allocate_stack_slots_full (m, FALSE, &locals_stack_size, &locals_stack_align);
@@ -1152,8 +1161,6 @@ mono_arch_allocate_vars (MonoCompile *m)
                        offset &= ~(sizeof (gpointer) - 1);
                        inst->inst_offset = offset;
                        offset += sizeof (gpointer);
-                       if (sig->call_convention == MONO_CALL_VARARG)
-                               m->sig_cookie += sizeof (gpointer);
                }
                curinst++;
        }
@@ -1169,12 +1176,12 @@ mono_arch_allocate_vars (MonoCompile *m)
                        } else {
                                size = mono_type_size (sig->params [i], &align);
                        }
+                       if (MONO_TYPE_ISSTRUCT (sig->params [i]) && size < sizeof (gpointer))
+                               size = align = sizeof (gpointer);
                        offset += align - 1;
                        offset &= ~(align - 1);
                        inst->inst_offset = offset;
                        offset += size;
-                       if ((sig->call_convention == MONO_CALL_VARARG) && (i < sig->sentinelpos)) 
-                               m->sig_cookie += size;
                }
                curinst++;
        }
@@ -1374,8 +1381,15 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
                } else
 #endif
                        for (i = 0; i < ainfo->size; ++i) {
+                               int antipadding = 0;
+                               if (ainfo->bytes) {
+                                       g_assert (i == 0);
+                                       antipadding = sizeof (gpointer) - ainfo->bytes;
+                               }
                                dreg = mono_alloc_ireg (cfg);
                                MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, soffset);
+                               if (antipadding)
+                                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHR_UN_IMM, dreg, dreg, antipadding * 8);
                                mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg + i, FALSE);
                                soffset += sizeof (gpointer);
                        }
@@ -1653,12 +1667,12 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_LOAD_MEMBASE:
-               case OP_LOADI4_MEMBASE:
+               case OP_LOADI8_MEMBASE:
                        /* 
                         * OP_STORE_MEMBASE_REG reg, offset(basereg) 
                         * OP_LOAD_MEMBASE offset(basereg), reg
                         */
-                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
+                       if (last_ins && (last_ins->opcode == OP_STOREI8_MEMBASE_REG
                                         || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
                            ins->inst_basereg == last_ins->inst_destbasereg &&
                            ins->inst_offset == last_ins->inst_offset) {
@@ -1679,7 +1693,7 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                         * OP_LOAD_MEMBASE offset(basereg), reg1
                         * OP_MOVE reg1, reg2
                         */
-                       } else if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
+                       } else if (last_ins && (last_ins->opcode == OP_LOADI8_MEMBASE
                                           || last_ins->opcode == OP_LOAD_MEMBASE) &&
                              ins->inst_basereg != last_ins->dreg &&
                              ins->inst_basereg == last_ins->inst_basereg &&
@@ -1703,7 +1717,7 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                         * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
                         * OP_ICONST reg, imm
                         */
-                       } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
+                       } else if (last_ins && (last_ins->opcode == OP_STOREI8_MEMBASE_IMM
                                                || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
                                   ins->inst_basereg == last_ins->inst_destbasereg &&
                                   ins->inst_offset == last_ins->inst_offset) {
@@ -1732,6 +1746,15 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
                                ins->sreg1 = last_ins->sreg1;                           
                        }
                        break;
+               case OP_LOADU4_MEMBASE:
+               case OP_LOADI4_MEMBASE:
+                       if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
+                                       ins->inst_basereg == last_ins->inst_destbasereg &&
+                                       ins->inst_offset == last_ins->inst_offset) {
+                               ins->opcode = (ins->opcode == OP_LOADI4_MEMBASE) ? OP_ICONV_TO_I4 : OP_ICONV_TO_U4;
+                               ins->sreg1 = last_ins->sreg1;
+                       }
+                       break;
                case OP_MOVE:
                        ins->opcode = OP_MOVE;
                        /* 
@@ -1783,32 +1806,6 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
                ins->opcode = OP_NOP;
                break;
        }
-       case OP_ICONV_TO_R4:
-       case OP_ICONV_TO_R8: {
-               /* FIXME: change precision for CEE_CONV_R4 */
-               static const guint64 adjust_val = 0x4330000080000000ULL;
-               int msw_reg = mono_alloc_ireg (cfg);
-               int xored = mono_alloc_ireg (cfg);
-               int adj_reg = mono_alloc_freg (cfg);
-               int tmp_reg = mono_alloc_freg (cfg);
-               int basereg = ppc_sp;
-               int offset = -8;
-               if (!ppc_is_imm16 (offset + 4)) {
-                       basereg = mono_alloc_ireg (cfg);
-                       MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
-               }
-               MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000);
-               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
-               MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000);
-               MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored);
-               MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val);
-               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
-               MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
-               if (ins->opcode == OP_ICONV_TO_R4)
-                       MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg);
-               ins->opcode = OP_NOP;
-               break;
-       }
        case OP_CKFINITE: {
                int msw_reg = mono_alloc_ireg (cfg);
                int basereg = ppc_sp;
@@ -2016,26 +2013,35 @@ loop_start:
                        /* handle rem separately */
                        goto loop_start;
                case OP_IREM:
-               case OP_IREM_UN: {
+               case OP_IREM_UN:
+               case OP_LREM:
+               case OP_LREM_UN: {
                        MonoInst *mul;
                        /* we change a rem dest, src1, src2 to
                         * div temp1, src1, src2
                         * mul temp2, temp1, src2
                         * sub dest, src1, temp2
                         */
-                       NEW_INS (cfg, mul, OP_IMUL);
-                       NEW_INS (cfg, temp, ins->opcode == OP_IREM? OP_IDIV: OP_IDIV_UN);
+                       if (ins->opcode == OP_IREM || ins->opcode == OP_IREM_UN) {
+                               NEW_INS (cfg, mul, OP_IMUL);
+                               NEW_INS (cfg, temp, ins->opcode == OP_IREM? OP_IDIV: OP_IDIV_UN);
+                               ins->opcode = OP_ISUB;
+                       } else {
+                               NEW_INS (cfg, mul, OP_LMUL);
+                               NEW_INS (cfg, temp, ins->opcode == OP_LREM? OP_LDIV: OP_LDIV_UN);
+                               ins->opcode = OP_LSUB;
+                       }
                        temp->sreg1 = ins->sreg1;
                        temp->sreg2 = ins->sreg2;
                        temp->dreg = mono_alloc_ireg (cfg);
                        mul->sreg1 = temp->dreg;
                        mul->sreg2 = ins->sreg2;
                        mul->dreg = mono_alloc_ireg (cfg);
-                       ins->opcode = OP_ISUB;
                        ins->sreg2 = mul->dreg;
                        break;
                }
                case OP_IADD_IMM:
+               case OP_LADD_IMM:
                case OP_ADD_IMM:
                case OP_ADDCC_IMM:
                        if (!ppc_is_imm16 (ins->inst_imm)) {
@@ -2047,6 +2053,7 @@ loop_start:
                        }
                        break;
                case OP_ISUB_IMM:
+               case OP_LSUB_IMM:
                case OP_SUB_IMM:
                        if (!ppc_is_imm16 (-ins->inst_imm)) {
                                NEW_INS (cfg, temp, OP_ICONST);
@@ -2065,7 +2072,8 @@ loop_start:
                case OP_LAND_IMM:
                case OP_LOR_IMM:
                case OP_LXOR_IMM:
-                       if ((ins->inst_imm & ~0xffffUL) && (ins->inst_imm & 0xffff)) {
+                       if ((ins->inst_imm & 0xffffffff00000000UL) ||
+                                       ((ins->inst_imm & 0xffff0000) && (ins->inst_imm & 0xffff))) {
                                NEW_INS (cfg, temp, OP_ICONST);
                                temp->inst_c0 = ins->inst_imm;
                                temp->dreg = mono_alloc_ireg (cfg);
@@ -2210,20 +2218,29 @@ static guchar*
 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
 {
        int offset = cfg->arch.fp_conv_var_offset;
+       int sub_offset;
        /* sreg is a float, dreg is an integer reg. ppc_f0 is used a scratch */
        if (size == 8) {
                ppc_fctidz (code, ppc_f0, sreg);
+               sub_offset = 0;
        } else {
                ppc_fctiwz (code, ppc_f0, sreg);
+               sub_offset = 4;
        }
-       if (ppc_is_imm16 (offset + 4)) {
+       if (ppc_is_imm16 (offset + sub_offset)) {
                ppc_stfd (code, ppc_f0, offset, cfg->frame_reg);
-               ppc_lwz (code, dreg, offset + 4, cfg->frame_reg);
+               if (size == 8)
+                       ppc_load_reg (code, dreg, offset + sub_offset, cfg->frame_reg);
+               else
+                       ppc_lwz (code, dreg, offset + sub_offset, cfg->frame_reg);
        } else {
                ppc_load (code, dreg, offset);
                ppc_add (code, dreg, dreg, cfg->frame_reg);
                ppc_stfd (code, ppc_f0, 0, dreg);
-               ppc_lwz (code, dreg, 4, dreg);
+               if (size == 8)
+                       ppc_load_reg (code, dreg, sub_offset, dreg);
+               else
+                       ppc_lwz (code, dreg, sub_offset, dreg);
        }
        if (!is_signed) {
                if (size == 1)
@@ -2356,7 +2373,9 @@ ppc_patch_full (guchar *code, const guchar *target, gboolean is_fd)
        guint32 prim = ppc_opcode (ins);
        guint32 ovf;
 
-       //g_print ("patching %p (0x%08x) to point to %p\n", code, ins, target);
+#ifdef DEBUG_PATCHING
+       g_print ("patching %p (0x%08x) to point to %p\n", code, ins, target);
+#endif
        if (prim == 18) {
                // prefer relative branches, they are more position independent (e.g. for AOT compilation).
                gint diff = target - code;
@@ -2592,8 +2611,12 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
                        } else
 #endif
                                for (j = 0; j < ainfo->size; ++j) {
-                                       ppc_load_reg (code, ainfo->reg  + j,
-                                               inst->inst_offset + j * sizeof (gpointer), inst->inst_basereg);
+                                       ppc_load_reg (code, ainfo->reg + j,
+                                                       inst->inst_offset + j * sizeof (gpointer),
+                                                       inst->inst_basereg);
+                                       /* FIXME: shift to the right */
+                                       if (ainfo->bytes)
+                                               NOT_IMPLEMENTED;
                                }
                        break;
                }
@@ -2812,8 +2835,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_store_reg (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg);
                        } else {
-                               /* FIXME: implement */
-                               g_assert_not_reached ();
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_store_reg_indexed (code, ins->sreg1, ins->inst_destbasereg, ppc_r0);
                        }
                        break;
                case OP_STOREI1_MEMINDEX:
@@ -2837,7 +2860,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        if (ppc_is_imm16 (ins->inst_offset)) {
                                ppc_load_reg (code, ins->dreg, ins->inst_offset, ins->inst_basereg);
                        } else {
-                               g_assert_not_reached ();
+                               ppc_load (code, ppc_r0, ins->inst_offset);
+                               ppc_load_reg_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0);
                        }
                        break;
                case OP_LOADI4_MEMBASE:
@@ -2925,6 +2949,24 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_ZEXT_I4:
                        ppc_clrldi (code, ins->dreg, ins->sreg1, 32);
                        break;
+               case OP_ICONV_TO_R4:
+               case OP_ICONV_TO_R8:
+               case OP_LCONV_TO_R4:
+               case OP_LCONV_TO_R8: {
+                       int tmp;
+                       if (ins->opcode == OP_ICONV_TO_R4 || ins->opcode == OP_ICONV_TO_R8) {
+                               ppc_extsw (code, ppc_r0, ins->sreg1);
+                               tmp = ppc_r0;
+                       } else {
+                               tmp = ins->sreg1;
+                       }
+                       ppc_store_reg (code, tmp, -8, ppc_r1);
+                       ppc_lfd (code, ins->dreg, -8, ppc_r1);
+                       ppc_fcfid (code, ins->dreg, ins->dreg);
+                       if (ins->opcode == OP_ICONV_TO_R4 || ins->opcode == OP_LCONV_TO_R4)
+                               ppc_frsp (code, ins->dreg, ins->dreg);
+                       break;
+               }
                case OP_COMPARE:
                case OP_ICOMPARE:
                case OP_LCOMPARE:
@@ -3096,7 +3138,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                g_assert_not_reached ();
                        }
                        break;
-               case OP_IDIV: {
+               case OP_IDIV:
+               case OP_LDIV: {
                        guint8 *divisor_is_m1;
                          /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
                          */
@@ -3104,27 +3147,28 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        divisor_is_m1 = code;
                        ppc_bc (code, PPC_BR_FALSE | PPC_BR_LIKELY, PPC_BR_EQ, 0);
                        ppc_lis (code, ppc_r0, 0x8000);
+                       if (ins->opcode == OP_LDIV)
+                               ppc_sldi (code, ppc_r0, ppc_r0, 32);
                        ppc_cmp (code, 0, 1, ins->sreg1, ppc_r0);
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "ArithmeticException");
                        ppc_patch (divisor_is_m1, code);
                         /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
                         */
-                       ppc_divwod (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       if (ins->opcode == OP_IDIV)
+                               ppc_divwod (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       else
+                               ppc_divdod (code, ins->dreg, ins->sreg1, ins->sreg2);
                        ppc_mfspr (code, ppc_r0, ppc_xer);
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
                        break;
                }
-               case OP_LDIV:
-                       ppc_divd (code, ins->dreg, ins->sreg1, ins->sreg2);
-                       /* FIXME: div by zero check */
-                       break;
-               case OP_LDIV_UN:
-                       ppc_divdu (code, ins->dreg, ins->sreg1, ins->sreg2);
-                       /* FIXME: div by zero check */
-                       break;
                case OP_IDIV_UN:
-                       ppc_divwuod (code, ins->dreg, ins->sreg1, ins->sreg2);
+               case OP_LDIV_UN:
+                       if (ins->opcode == OP_IDIV_UN)
+                               ppc_divwuod (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       else
+                               ppc_divduod (code, ins->dreg, ins->sreg1, ins->sreg2);
                        ppc_mfspr (code, ppc_r0, ppc_xer);
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException");
@@ -3188,10 +3232,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_SHR_UN_IMM:
                case OP_LSHR_UN_IMM:
-                       ppc_srdi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x3f));
+                       if (ins->inst_imm & 0x3f)
+                               ppc_srdi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x3f));
+                       else
+                               ppc_mr (code, ins->dreg, ins->sreg1);
                        break;
                case OP_ISHR_UN_IMM:
-                       ppc_srwi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       if (ins->inst_imm & 0x1f)
+                               ppc_srwi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       else
+                               ppc_mr (code, ins->dreg, ins->sreg1);
                        break;
                case OP_ISHR_UN:
                        ppc_srw (code, ins->dreg, ins->sreg1, ins->sreg2);
@@ -3221,20 +3271,28 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_IMUL_OVF:
+               case OP_LMUL_OVF:
                        /* we annot use mcrxr, since it's not implemented on some processors 
                         * XER format: SO, OV, CA, reserved [21 bits], count [8 bits]
                         */
-                       ppc_mullwo (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       if (ins->opcode == OP_IMUL_OVF)
+                               ppc_mullwo (code, ins->dreg, ins->sreg1, ins->sreg2);
+                       else
+                               ppc_mulldo (code, ins->dreg, ins->sreg1, ins->sreg2);
                        ppc_mfspr (code, ppc_r0, ppc_xer);
                        ppc_andisd (code, ppc_r0, ppc_r0, (1<<14));
                        EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException");
                        break;
                case OP_IMUL_OVF_UN:
+               case OP_LMUL_OVF_UN:
                        /* we first multiply to get the high word and compare to 0
                         * to set the flags, then the result is discarded and then 
                         * we multiply to get the lower * bits result
                         */
-                       ppc_mulhwu (code, ppc_r0, ins->sreg1, ins->sreg2);
+                       if (ins->opcode == OP_IMUL_OVF_UN)
+                               ppc_mulhwu (code, ppc_r0, ins->sreg1, ins->sreg2);
+                       else
+                               ppc_mulhdu (code, ppc_r0, ins->sreg1, ins->sreg2);
                        ppc_cmpi (code, 0, 0, ppc_r0, 0);
                        EMIT_COND_SYSTEM_EXCEPTION (CEE_BNE_UN - CEE_BEQ, "OverflowException");
                        ppc_mulld (code, ins->dreg, ins->sreg1, ins->sreg2);
@@ -3305,7 +3363,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                        }
                                }*/
                                /* FIXME: restore registers before changing ppc_sp */
-                               for (i = MONO_LAST_SAVED_GREG; i >= MONO_FIRST_SAVED_GREG; --i) {
+                               for (i = 31; i >= 13; --i) {
                                        if (cfg->used_int_regs & (1 << i)) {
                                                pos += sizeof (gulong);
                                                ppc_load_reg_indexed (code, i, -pos, ppc_sp);
@@ -3825,6 +3883,26 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ppc_load_sequence (code, ins->dreg, 0x0f0f0f0f0f0f0f0fL);
                        break;
                }
+               case OP_ATOMIC_ADD_NEW_I4:
+               case OP_ATOMIC_ADD_NEW_I8: {
+                       guint8 *loop = code, *branch;
+                       g_assert (ins->inst_offset == 0);
+                       if (ins->opcode == OP_ATOMIC_ADD_NEW_I4)
+                               ppc_lwarx (code, ppc_r0, 0, ins->inst_basereg);
+                       else
+                               ppc_ldarx (code, ppc_r0, 0, ins->inst_basereg);
+                       ppc_add (code, ppc_r0, ppc_r0, ins->sreg2);
+                       if (ins->opcode == OP_ATOMIC_ADD_NEW_I4)
+                               ppc_stwcxd (code, ppc_r0, 0, ins->inst_basereg);
+                       else
+                               ppc_stdcxd (code, ppc_r0, 0, ins->inst_basereg);
+                       branch = code;
+                       ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0);
+                       ppc_patch (branch, loop);
+                       ppc_mr (code, ins->dreg, ppc_r0);
+                       break;
+               }
+
                default:
                        g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
                        g_assert_not_reached ();
@@ -3875,7 +3953,9 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
 
                target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
 
-               //g_print ("patching %p to %p (type %d)\n", ip, target, patch_info->type);
+#ifdef DEBUG_PATCHING
+               g_print ("patching %p to %p (type %d)\n", ip, target, patch_info->type);
+#endif
 
                switch (patch_info->type) {
                case MONO_PATCH_INFO_IP:
@@ -3927,6 +4007,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                case MONO_PATCH_INFO_INTERNAL_METHOD:
                case MONO_PATCH_INFO_ABS:
                case MONO_PATCH_INFO_CLASS_INIT:
+               case MONO_PATCH_INFO_RGCTX_FETCH:
                        is_fd = TRUE;
                        break;
                default:
@@ -3990,7 +4071,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                ppc_stfd (code, i, -pos, ppc_sp);
                        }
                }*/
-               for (i = MONO_LAST_SAVED_GREG; i >= MONO_FIRST_SAVED_GREG; --i) {
+               for (i = 31; i >= 13; --i) {
                        if (cfg->used_int_regs & (1 << i)) {
                                pos += sizeof (gulong);
                                ppc_store_reg (code, i, -pos, ppc_sp);
@@ -3999,13 +4080,13 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        } else {
                pos += sizeof (MonoLMF);
                lmf_offset = pos;
-               for (i = MONO_FIRST_SAVED_GREG; i <= MONO_LAST_SAVED_GREG; i++) {
+               for (i = 13; i <= 31; i++) {
                        ppc_store_reg (code, i, (-pos + G_STRUCT_OFFSET(MonoLMF, iregs) +
-                               ((i-MONO_FIRST_SAVED_GREG) * sizeof (gulong))), ppc_r1);
+                               ((i-13) * sizeof (gulong))), ppc_r1);
                }
-               for (i = MONO_FIRST_SAVED_FREG; i <= MONO_LAST_SAVED_FREG; i++) {
+               for (i = 14; i <= 31; i++) {
                        ppc_stfd (code, i, (-pos + G_STRUCT_OFFSET(MonoLMF, fregs) +
-                               ((i-MONO_FIRST_SAVED_FREG) * sizeof (gdouble))), ppc_r1);
+                               ((i-14) * sizeof (gdouble))), ppc_r1);
                }
        }
        alloc_size += pos;
@@ -4029,12 +4110,14 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                ppc_mr (code, cfg->frame_reg, ppc_sp);
 
        /* store runtime generic context */
+#ifdef MONO_ARCH_RGCTX_REG
        if (cfg->rgctx_var) {
                g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET &&
                                (cfg->rgctx_var->inst_basereg == ppc_r1 || cfg->rgctx_var->inst_basereg == ppc_r31));
 
                ppc_store_reg (code, MONO_ARCH_RGCTX_REG, cfg->rgctx_var->inst_offset, cfg->rgctx_var->inst_basereg);
        }
+#endif
 
         /* compute max_offset in order to use short forward jumps
         * we always do it on ppc because the immediate displacement
@@ -4199,11 +4282,24 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                                ppc_stb (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
                                        else
 #endif
-                                               ppc_store_reg (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+                                       {
+                                               if (ainfo->bytes) {
+                                                       g_assert (cur_reg == 0);
+                                                       ppc_sldi (code, ppc_r0, ainfo->reg,
+                                                                       (sizeof (gpointer) - ainfo->bytes) * 8);
+                                                       ppc_store_reg (code, ppc_r0, doffset, inst->inst_basereg);
+                                               } else {
+                                                       ppc_store_reg (code, ainfo->reg + cur_reg, doffset,
+                                                                       inst->inst_basereg);
+                                               }
+                                       }
                                        soffset += sizeof (gpointer);
                                        doffset += sizeof (gpointer);
                                }
                                if (ainfo->vtsize) {
+                                       /* FIXME: we need to do the shifting here, too */
+                                       if (ainfo->bytes)
+                                               NOT_IMPLEMENTED;
                                        /* load the previous stack pointer in r11 (r0 gets overwritten by the memcpy) */
                                        ppc_load_reg (code, ppc_r11, 0, ppc_sp);
                                        if ((size & 7) != 0) {
@@ -4357,9 +4453,9 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                 * we didn't actually change them (idea from Zoltan).
                 */
                /* restore iregs */
-               for (i = MONO_FIRST_SAVED_GREG; i <= MONO_LAST_SAVED_FREG; ++i) {
+               for (i = 13; i <= 31; ++i) {
                        ppc_load_reg (code, i, G_STRUCT_OFFSET (MonoLMF, iregs) +
-                               (i - MONO_FIRST_SAVED_GREG) * sizeof (gulong), ppc_r11);
+                               (i - 13) * sizeof (gulong), ppc_r11);
                }
                /* restore fregs */
                /*for (i = 14; i < 32; i++) {
@@ -4395,7 +4491,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                                ppc_lfd (code, i, -pos, ppc_sp);
                        }
                }*/
-               for (i = MONO_LAST_SAVED_GREG; i >= MONO_FIRST_SAVED_GREG; --i) {
+               for (i = 31; i >= 13; --i) {
                        if (cfg->used_int_regs & (1 << i)) {
                                pos += sizeof (gulong);
                                ppc_load_reg (code, i, -pos, ppc_sp);
@@ -4607,10 +4703,10 @@ mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
 
 #ifdef MONO_ARCH_HAVE_IMT
 
-#define CMP_SIZE 12
+#define CMP_SIZE (PPC_LOAD_SEQUENCE_LENGTH + 4)
 #define BR_SIZE 4
 #define JUMP_IMM_SIZE 12
-#define JUMP_IMM32_SIZE 16
+#define JUMP_IMM32_SIZE (PPC_LOAD_SEQUENCE_LENGTH + 8)
 #define ENABLE_WRONG_METHOD_CHECK 0
 
 /*
@@ -4654,7 +4750,7 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI
                code = mono_method_alloc_generic_virtual_thunk (domain, size);
        } else {
                /* the initial load of the vtable address */
-               size += 8;
+               size += PPC_LOAD_SEQUENCE_LENGTH;
                code = mono_code_manager_reserve (domain->code_mp, size);
        }
        start = code;
@@ -4747,11 +4843,13 @@ mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSha
 }
 #endif
 
+#ifdef MONO_ARCH_RGCTX_REG
 MonoVTable*
 mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
 {
        return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
 }
+#endif
 
 MonoInst*
 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
@@ -4796,9 +4894,9 @@ mono_arch_get_thread_intrinsic (MonoCompile* cfg)
 gpointer
 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
 {
-       g_assert (reg >= MONO_FIRST_SAVED_GREG);
+       g_assert (reg >= 13);
 
-       return (gpointer)ctx->regs [reg - MONO_FIRST_SAVED_GREG];
+       return (gpointer)ctx->regs [reg - 13];
 }
 
 void