2005-12-12 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-arm.c
index ccb4b154b12d89168520cd2e6f162753397ee78f..7a100ad9d92c19c0e8c6842fdb03d03af6906899 100644 (file)
@@ -68,10 +68,42 @@ mono_arch_fregname (int reg) {
        return "unknown";
 }
 
+static guint8*
+emit_big_add (guint8 *code, int dreg, int sreg, int imm)
+{
+       int imm8, rot_amount;
+       if ((imm8 = mono_arm_is_rotated_imm8 (imm, &rot_amount)) >= 0) {
+               ARM_ADD_REG_IMM (code, dreg, sreg, imm8, rot_amount);
+               return code;
+       }
+       g_assert (dreg != sreg);
+       code = mono_arm_emit_load_imm (code, dreg, imm);
+       ARM_ADD_REG_REG (code, dreg, dreg, sreg);
+       return code;
+}
+
 static guint8*
 emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffset)
 {
-       /* FIXME: unroll for large sizes, but we need more registers */
+       /* we can use r0-r3, since this is called only for incoming args on the stack */
+       if (size > sizeof (gpointer) * 4) {
+               guint8 *start_loop;
+               code = emit_big_add (code, ARMREG_R0, sreg, soffset);
+               code = emit_big_add (code, ARMREG_R1, dreg, doffset);
+               start_loop = code = mono_arm_emit_load_imm (code, ARMREG_R2, size);
+               ARM_LDR_IMM (code, ARMREG_R3, ARMREG_R0, 0);
+               ARM_STR_IMM (code, ARMREG_R3, ARMREG_R1, 0);
+               ARM_ADD_REG_IMM8 (code, ARMREG_R0, ARMREG_R0, 4);
+               ARM_ADD_REG_IMM8 (code, ARMREG_R1, ARMREG_R1, 4);
+               ARM_SUBS_REG_IMM8 (code, ARMREG_R2, ARMREG_R2, 4);
+               ARM_B_COND (code, ARMCOND_NE, 0);
+               arm_patch (code - 4, start_loop);
+               return code;
+       }
+       g_assert (arm_is_imm12 (doffset));
+       g_assert (arm_is_imm12 (doffset + size));
+       g_assert (arm_is_imm12 (soffset));
+       g_assert (arm_is_imm12 (soffset + size));
        while (size >= 4) {
                ARM_LDR_IMM (code, ARMREG_LR, sreg, soffset);
                ARM_STR_IMM (code, ARMREG_LR, dreg, doffset);
@@ -158,7 +190,6 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask)
 {
        guint32 opts = 0;
 
-       g_assert (arm_is_imm8 (8));
        /* no arm-specific optimizations yet */
        *exclude_mask = 0;
        return opts;
@@ -228,8 +259,8 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
        regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V3));
        regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V4));
        regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V5));
-       regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V6));
-       regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V7));
+       /*regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V6));*/
+       /*regs = g_list_prepend (regs, GUINT_TO_POINTER (ARMREG_V7));*/
 
        return regs;
 }
@@ -305,15 +336,15 @@ add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple)
                }
        } else {
                if (*gr > ARMREG_R2) {
-                       *stack_size += 7;
-                       *stack_size &= ~7;
+                       /**stack_size += 7;
+                       *stack_size &= ~7;*/
                        ainfo->offset = *stack_size;
                        ainfo->reg = ARMREG_SP; /* in the caller */
                        ainfo->regtype = RegTypeBase;
                        *stack_size += 8;
                } else {
-                       if ((*gr) & 1)
-                               (*gr) ++;
+                       /*if ((*gr) & 1)
+                               (*gr) ++;*/
                        ainfo->reg = *gr;
                }
                (*gr) ++;
@@ -510,7 +541,7 @@ mono_arch_allocate_vars (MonoCompile *m)
        MonoMethodHeader *header;
        MonoInst *inst;
        int i, offset, size, align, curinst;
-       int frame_reg = ARMREG_SP;
+       int frame_reg = ARMREG_FP;
 
        /* FIXME: this will change when we use FP as gcc does */
        m->flags |= MONO_CFG_HAS_SPILLUP;
@@ -518,17 +549,6 @@ mono_arch_allocate_vars (MonoCompile *m)
        /* allow room for the vararg method args: void* and long/double */
        if (mono_jit_trace_calls != NULL && mono_trace_eval (m->method))
                m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
-       /* this is bug #60332: remove when #59509 is fixed, so no weird vararg 
-        * call convs needs to be handled this way.
-        */
-       if (m->flags & MONO_CFG_HAS_VARARGS)
-               m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
-       /* gtk-sharp and other broken code will dllimport vararg functions even with
-        * non-varargs signatures. Since there is little hope people will get this right
-        * we assume they won't.
-        */
-       if (m->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE)
-               m->param_area = MAX (m->param_area, sizeof (gpointer)*8);
 
        header = mono_method_get_header (m->method);
 
@@ -618,6 +638,11 @@ mono_arch_allocate_vars (MonoCompile *m)
                else
                        size = mono_type_size (inst->inst_vtype, &align);
 
+               /* FIXME: if a structure is misaligned, our memcpy doesn't work,
+                * since it loads/stores misaligned words, which don't do the right thing.
+                */
+               if (align < 4 && size >= 4)
+                       align = 4;
                offset += align - 1;
                offset &= ~(align - 1);
                inst->inst_offset = offset;
@@ -649,6 +674,11 @@ mono_arch_allocate_vars (MonoCompile *m)
                        inst->opcode = OP_REGOFFSET;
                        inst->inst_basereg = frame_reg;
                        size = mono_type_size (sig->params [i], &align);
+                       /* FIXME: if a structure is misaligned, our memcpy doesn't work,
+                        * since it loads/stores misaligned words, which don't do the right thing.
+                        */
+                       if (align < 4 && size >= 4)
+                               align = 4;
                        offset += align - 1;
                        offset &= ~(align - 1);
                        inst->inst_offset = offset;
@@ -755,7 +785,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                                arg->unused = ainfo->reg | (ainfo->size << 4) | (ainfo->vtsize << 8) | ((ainfo->offset / 4) << 20);
                        } else if (ainfo->regtype == RegTypeBase) {
                                arg->opcode = OP_OUTARG_MEMBASE;
-                               arg->unused = ainfo->offset;
+                               arg->unused = (ainfo->offset << 8) | ainfo->size;
                        } else if (ainfo->regtype == RegTypeFP) {
                                arg->unused = ainfo->reg;
                                /* FPA args are passed in int regs */
@@ -1187,6 +1217,40 @@ map_to_reg_reg_op (int op)
                return CEE_OR;
        case OP_XOR_IMM:
                return CEE_XOR;
+       case OP_LOAD_MEMBASE:
+               return OP_LOAD_MEMINDEX;
+       case OP_LOADI4_MEMBASE:
+               return OP_LOADI4_MEMINDEX;
+       case OP_LOADU4_MEMBASE:
+               return OP_LOADU4_MEMINDEX;
+       case OP_LOADU1_MEMBASE:
+               return OP_LOADU1_MEMINDEX;
+       case OP_LOADI2_MEMBASE:
+               return OP_LOADI2_MEMINDEX;
+       case OP_LOADU2_MEMBASE:
+               return OP_LOADU2_MEMINDEX;
+       case OP_LOADI1_MEMBASE:
+               return OP_LOADI1_MEMINDEX;
+       case OP_STOREI1_MEMBASE_REG:
+               return OP_STOREI1_MEMINDEX;
+       case OP_STOREI2_MEMBASE_REG:
+               return OP_STOREI2_MEMINDEX;
+       case OP_STOREI4_MEMBASE_REG:
+               return OP_STOREI4_MEMINDEX;
+       case OP_STORE_MEMBASE_REG:
+               return OP_STORE_MEMINDEX;
+       case OP_STORER4_MEMBASE_REG:
+               return OP_STORER4_MEMINDEX;
+       case OP_STORER8_MEMBASE_REG:
+               return OP_STORER8_MEMINDEX;
+       case OP_STORE_MEMBASE_IMM:
+               return OP_STORE_MEMBASE_REG;
+       case OP_STOREI1_MEMBASE_IMM:
+               return OP_STOREI1_MEMBASE_REG;
+       case OP_STOREI2_MEMBASE_IMM:
+               return OP_STOREI2_MEMBASE_REG;
+       case OP_STOREI4_MEMBASE_IMM:
+               return OP_STOREI4_MEMBASE_REG;
        }
        g_assert_not_reached ();
 }
@@ -1200,7 +1264,7 @@ static void
 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 {
        MonoInst *ins, *next, *temp, *last_ins = NULL;
-       int rot_amount, imm8;
+       int rot_amount, imm8, low_imm;
 
        /* setup the virtual reg allocator */
        if (bb->max_ireg > cfg->rs->next_vireg)
@@ -1208,6 +1272,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 
        ins = bb->code;
        while (ins) {
+loop_start:
                switch (ins->opcode) {
                case OP_ADD_IMM:
                case OP_SUB_IMM:
@@ -1228,12 +1293,121 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_MUL_IMM:
+                       if (ins->inst_imm == 1) {
+                               ins->opcode = OP_MOVE;
+                               break;
+                       }
+                       if (ins->inst_imm == 0) {
+                               ins->opcode = OP_ICONST;
+                               ins->inst_c0 = 0;
+                               break;
+                       }
+                       imm8 = mono_is_power_of_two (ins->inst_imm);
+                       if (imm8 > 0) {
+                               ins->opcode = OP_SHL_IMM;
+                               ins->inst_imm = imm8;
+                               break;
+                       }
                        NEW_INS (cfg, temp, OP_ICONST);
                        temp->inst_c0 = ins->inst_imm;
                        temp->dreg = mono_regstate_next_int (cfg->rs);
                        ins->sreg2 = temp->dreg;
                        ins->opcode = CEE_MUL;
                        break;
+               case OP_LOAD_MEMBASE:
+               case OP_LOADI4_MEMBASE:
+               case OP_LOADU4_MEMBASE:
+               case OP_LOADU1_MEMBASE:
+                       /* we can do two things: load the immed in a register
+                        * and use an indexed load, or see if the immed can be
+                        * represented as an ad_imm + a load with a smaller offset
+                        * that fits. We just do the first for now, optimize later.
+                        */
+                       if (arm_is_imm12 (ins->inst_offset))
+                               break;
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_offset;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg2 = temp->dreg;
+                       ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       break;
+               case OP_LOADI2_MEMBASE:
+               case OP_LOADU2_MEMBASE:
+               case OP_LOADI1_MEMBASE:
+                       if (arm_is_imm8 (ins->inst_offset))
+                               break;
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_offset;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg2 = temp->dreg;
+                       ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       break;
+               case OP_LOADR4_MEMBASE:
+               case OP_LOADR8_MEMBASE:
+                       if (arm_is_fpimm8 (ins->inst_offset))
+                               break;
+                       low_imm = ins->inst_offset & 0x1ff;
+                       if ((imm8 = mono_arm_is_rotated_imm8 (ins->inst_offset & ~0x1ff, &rot_amount)) >= 0) {
+                               NEW_INS (cfg, temp, OP_ADD_IMM);
+                               temp->inst_imm = ins->inst_offset & ~0x1ff;
+                               temp->sreg1 = ins->inst_basereg;
+                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               ins->inst_basereg = temp->dreg;
+                               ins->inst_offset = low_imm;
+                               break;
+                       }
+                       /* FPA doesn't have indexed load instructions */
+                       g_assert_not_reached ();
+                       break;
+               case OP_STORE_MEMBASE_REG:
+               case OP_STOREI4_MEMBASE_REG:
+               case OP_STOREI1_MEMBASE_REG:
+                       if (arm_is_imm12 (ins->inst_offset))
+                               break;
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_offset;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg2 = temp->dreg;
+                       ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       break;
+               case OP_STOREI2_MEMBASE_REG:
+                       if (arm_is_imm8 (ins->inst_offset))
+                               break;
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_offset;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg2 = temp->dreg;
+                       ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       break;
+               case OP_STORER4_MEMBASE_REG:
+               case OP_STORER8_MEMBASE_REG:
+                       if (arm_is_fpimm8 (ins->inst_offset))
+                               break;
+                       low_imm = ins->inst_offset & 0x1ff;
+                       if ((imm8 = mono_arm_is_rotated_imm8 (ins->inst_offset & ~ 0x1ff, &rot_amount)) >= 0 && arm_is_fpimm8 (low_imm)) {
+                               NEW_INS (cfg, temp, OP_ADD_IMM);
+                               temp->inst_imm = ins->inst_offset & ~0x1ff;
+                               temp->sreg1 = ins->inst_destbasereg;
+                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                               ins->inst_destbasereg = temp->dreg;
+                               ins->inst_offset = low_imm;
+                               break;
+                       }
+                       /*g_print ("fail with: %d (%d, %d)\n", ins->inst_offset, ins->inst_offset & ~0x1ff, low_imm);*/
+                       /* FPA doesn't have indexed store instructions */
+                       g_assert_not_reached ();
+                       break;
+               case OP_STORE_MEMBASE_IMM:
+               case OP_STOREI1_MEMBASE_IMM:
+               case OP_STOREI2_MEMBASE_IMM:
+               case OP_STOREI4_MEMBASE_IMM:
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = ins->inst_imm;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+                       ins->sreg1 = temp->dreg;
+                       ins->opcode = map_to_reg_reg_op (ins->opcode);
+                       last_ins = temp;
+                       goto loop_start; /* make it handle the possibly big ins->inst_offset */
                }
                last_ins = ins;
                ins = ins->next;
@@ -1278,7 +1452,7 @@ emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size,
 
 typedef struct {
        guchar *code;
-       guchar *target;
+       const guchar *target;
        int absolute;
        int found;
 } PatchData;
@@ -1339,7 +1513,7 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) {
 }
 
 static void
-handle_thunk (int absolute, guchar *code, guchar *target) {
+handle_thunk (int absolute, guchar *code, const guchar *target) {
        MonoDomain *domain = mono_domain_get ();
        PatchData pdata;
 
@@ -1364,7 +1538,7 @@ handle_thunk (int absolute, guchar *code, guchar *target) {
 }
 
 void
-arm_patch (guchar *code, guchar *target)
+arm_patch (guchar *code, const guchar *target)
 {
        guint32 ins = *(guint32*)code;
        guint32 prim = (ins >> 25) & 7;
@@ -1400,6 +1574,21 @@ arm_patch (guchar *code, guchar *target)
                /* branch and exchange: the address is constructed in a reg */
                g_assert_not_reached ();
        } else {
+               guint32 ccode [3];
+               guint32 *tmp = ccode;
+               ARM_LDR_IMM (tmp, ARMREG_IP, ARMREG_PC, 0);
+               ARM_MOV_REG_REG (tmp, ARMREG_LR, ARMREG_PC);
+               ARM_MOV_REG_REG (tmp, ARMREG_PC, ARMREG_IP);
+               if (ins == ccode [2]) {
+                       tmp = (guint32*)code;
+                       tmp [-1] = (guint32)target;
+                       return;
+               }
+               if (ins == ccode [0]) {
+                       tmp = (guint32*)code;
+                       tmp [2] = (guint32)target;
+                       return;
+               }
                g_assert_not_reached ();
        }
 //     g_print ("patched with 0x%08x\n", ins);
@@ -1433,6 +1622,14 @@ guint8*
 mono_arm_emit_load_imm (guint8 *code, int dreg, guint32 val)
 {
        int imm8, rot_amount;
+#if 0
+       ARM_LDR_IMM (code, dreg, ARMREG_PC, 0);
+       /* skip the constant pool */
+       ARM_B (code, 0);
+       *(int*)code = val;
+       code += 4;
+       return code;
+#endif
        if ((imm8 = mono_arm_is_rotated_imm8 (val, &rot_amount)) >= 0) {
                ARM_MOV_REG_IMM (code, dreg, imm8, rot_amount);
        } else if ((imm8 = mono_arm_is_rotated_imm8 (~val, &rot_amount)) >= 0) {
@@ -1554,8 +1751,24 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_STORE_MEMBASE_REG:
                case OP_STOREI4_MEMBASE_REG:
-                       g_assert (arm_is_imm12 (ins->inst_offset));
-                       ARM_STR_IMM (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
+                       /* this case is special, since it happens for spill code after lowering has been called */
+                       if (arm_is_imm12 (ins->inst_offset)) {
+                               ARM_STR_IMM (code, ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
+                       } else {
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                               ARM_STR_REG_REG (code, ins->sreg1, ins->inst_destbasereg, ARMREG_LR);
+                       }
+                       break;
+               case OP_STOREI1_MEMINDEX:
+                       ARM_STRB_REG_REG (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
+                       break;
+               case OP_STOREI2_MEMINDEX:
+                       /* note: the args are reversed in the macro */
+                       ARM_STRH_REG_REG (code, ins->inst_destbasereg, ins->sreg1, ins->sreg2);
+                       break;
+               case OP_STORE_MEMINDEX:
+               case OP_STOREI4_MEMINDEX:
+                       ARM_STR_REG_REG (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2);
                        break;
                case CEE_LDIND_I:
                case CEE_LDIND_I4:
@@ -1565,11 +1778,36 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_LOADU4_MEM:
                        g_assert_not_reached ();
                        break;
+               case OP_LOAD_MEMINDEX:
+               case OP_LOADI4_MEMINDEX:
+               case OP_LOADU4_MEMINDEX:
+                       ARM_LDR_REG_REG (code, ins->dreg, ins->inst_basereg, ins->sreg2);
+                       break;
+               case OP_LOADI1_MEMINDEX:
+                       /* note: the args are reversed in the macro */
+                       ARM_LDRSB_REG_REG (code, ins->inst_basereg, ins->dreg, ins->sreg2);
+                       break;
+               case OP_LOADU1_MEMINDEX:
+                       ARM_LDRB_REG_REG (code, ins->dreg, ins->inst_basereg, ins->sreg2);
+                       break;
+               case OP_LOADI2_MEMINDEX:
+                       /* note: the args are reversed in the macro */
+                       ARM_LDRSH_REG_REG (code, ins->inst_basereg, ins->dreg, ins->sreg2);
+                       break;
+               case OP_LOADU2_MEMINDEX:
+                       /* note: the args are reversed in the macro */
+                       ARM_LDRH_REG_REG (code, ins->inst_basereg, ins->dreg, ins->sreg2);
+                       break;
                case OP_LOAD_MEMBASE:
                case OP_LOADI4_MEMBASE:
                case OP_LOADU4_MEMBASE:
-                       g_assert (arm_is_imm12 (ins->inst_offset));
-                       ARM_LDR_IMM (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       /* this case is special, since it happens for spill code after lowering has been called */
+                       if (arm_is_imm12 (ins->inst_offset)) {
+                               ARM_LDR_IMM (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+                       } else {
+                               code = mono_arm_emit_load_imm (code, ARMREG_LR, ins->inst_offset);
+                               ARM_LDR_REG_REG (code, ins->dreg, ins->inst_basereg, ARMREG_LR);
+                       }
                        break;
                case OP_LOADI1_MEMBASE:
                        g_assert (arm_is_imm8 (ins->inst_offset));
@@ -1745,16 +1983,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_SHL_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SHL_IMM:
-                       ARM_SHL_IMM (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       if (ins->inst_imm)
+                               ARM_SHL_IMM (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
                        break;
                case CEE_SHR:
                        ARM_SAR_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
                        break;
                case OP_SHR_IMM:
-                       ARM_SAR_IMM (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       if (ins->inst_imm)
+                               ARM_SAR_IMM (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
                        break;
                case OP_SHR_UN_IMM:
-                       ARM_SHR_IMM (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
+                       if (ins->inst_imm)
+                               ARM_SHR_IMM (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f));
                        break;
                case CEE_SHR_UN:
                        ARM_SHR_REG (code, ins->dreg, ins->sreg1, ins->sreg2);
@@ -1794,18 +2035,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case CEE_CONV_U4:
                case OP_MOVE:
                case OP_SETREG:
-                       ARM_MOV_REG_REG (code, ins->dreg, ins->sreg1);
+                       if (ins->dreg != ins->sreg1)
+                               ARM_MOV_REG_REG (code, ins->dreg, ins->sreg1);
                        break;
                case OP_SETLRET: {
                        int saved = ins->sreg2;
-                       if (ins->sreg2 == ARMREG_R0) {
+                       if (ins->sreg2 == ARM_LSW_REG) {
                                ARM_MOV_REG_REG (code, ARMREG_LR, ins->sreg2);
                                saved = ARMREG_LR;
                        }
-                       if (ins->sreg1 != ARMREG_R0)
-                               ARM_MOV_REG_REG (code, ARMREG_R0, ins->sreg1);
-                       if (saved != ARMREG_R1)
-                               ARM_MOV_REG_REG (code, ARMREG_R1, saved);
+                       if (ins->sreg1 != ARM_LSW_REG)
+                               ARM_MOV_REG_REG (code, ARM_LSW_REG, ins->sreg1);
+                       if (saved != ARM_MSW_REG)
+                               ARM_MOV_REG_REG (code, ARM_MSW_REG, saved);
                        break;
                }
                case OP_SETFREG:
@@ -1815,50 +2057,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FCONV_TO_R4:
                        ARM_MVFS (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_JMP: {
-#if ARM_PORT
-                       int i, pos = 0;
-                       
+               case CEE_JMP:
                        /*
                         * Keep in sync with mono_arch_emit_epilog
                         */
                        g_assert (!cfg->method->save_lmf);
-                       if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-                               if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) {
-                                       ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg);
-                               } else {
-                                       ppc_load (code, ppc_r11, cfg->stack_usage + PPC_RET_ADDR_OFFSET);
-                                       ppc_lwzx (code, ppc_r0, cfg->frame_reg, ppc_r11);
-                               }
-                               ppc_mtlr (code, ppc_r0);
-                       }
-                       if (ppc_is_imm16 (cfg->stack_usage)) {
-                               ppc_addic (code, ppc_sp, cfg->frame_reg, cfg->stack_usage);
-                       } else {
-                               ppc_load (code, ppc_r11, cfg->stack_usage);
-                               ppc_add (code, ppc_sp, cfg->frame_reg, ppc_r11);
-                       }
-                       if (!cfg->method->save_lmf) {
-                               /*for (i = 31; i >= 14; --i) {
-                                       if (cfg->used_float_regs & (1 << i)) {
-                                               pos += sizeof (double);
-                                               ppc_lfd (code, i, -pos, cfg->frame_reg);
-                                       }
-                               }*/
-                               for (i = 31; i >= 13; --i) {
-                                       if (cfg->used_int_regs & (1 << i)) {
-                                               pos += sizeof (gulong);
-                                               ppc_lwz (code, i, -pos, cfg->frame_reg);
-                                       }
-                               }
-                       } else {
-                               /* FIXME restore from MonoLMF: though this can't happen yet */
-                       }
+                       code = emit_big_add (code, ARMREG_SP, cfg->frame_reg, cfg->stack_usage);
+                       ARM_POP_NWB (code, cfg->used_int_regs | ((1 << ARMREG_SP)) | ((1 << ARMREG_LR)));
                        mono_add_patch_info (cfg, (guint8*) code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
-                       ppc_b (code, 0);
-#endif
+                       ARM_B (code, 0);
                        break;
-               }
                case OP_CHECK_THIS:
                        /* ensure ins->sreg1 is not NULL */
                        ARM_LDR_IMM (code, ARMREG_LR, ins->sreg1, 0);
@@ -1909,10 +2117,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_VCALL_MEMBASE:
                case OP_VOIDCALL_MEMBASE:
                case OP_CALL_MEMBASE:
-                       g_assert (ins->inst_offset >= 0 && ins->inst_offset < 4096);
-                       ARM_LDR_IMM (code, ARMREG_IP, ins->sreg1, ins->inst_offset);
+                       g_assert (arm_is_imm12 (ins->inst_offset));
+                       g_assert (ins->sreg1 != ARMREG_LR);
                        ARM_MOV_REG_REG (code, ARMREG_LR, ARMREG_PC);
-                       ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_IP);
+                       ARM_LDR_IMM (code, ARMREG_PC, ins->sreg1, ins->inst_offset);
                        break;
                case OP_OUTARG:
                        g_assert_not_reached ();
@@ -1924,11 +2132,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        alloca_waste &= ~7;
                        /* round the size to 8 bytes */
                        ARM_ADD_REG_IMM8 (code, ins->dreg, ins->sreg1, 7);
-                       ARM_SHL_IMM (code, ins->dreg, ins->dreg, 3);
-                       ARM_SHR_IMM (code, ins->dreg, ins->dreg, 3);
+                       ARM_BIC_REG_IMM8 (code, ins->dreg, ins->sreg1, 7);
                        ARM_ADD_REG_IMM8 (code, ins->dreg, ins->dreg, alloca_waste);
                        ARM_SUB_REG_REG (code, ARMREG_SP, ARMREG_SP, ins->dreg);
-                       /* memzero the area */
+                       /* memzero the area: dreg holds the size, sp is the pointer */
                        if (ins->flags & MONO_INST_INIT) {
                                guint8 *start_loop, *branch_to_cond;
                                ARM_MOV_REG_IMM8 (code, ARMREG_LR, 0);
@@ -1946,6 +2153,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                case CEE_RET:
+                       g_assert_not_reached ();
                        ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_LR);
                        break;
                case CEE_THROW: {
@@ -1983,19 +2191,33 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                }
                case OP_START_HANDLER:
-                       g_assert (arm_is_imm12 (ins->inst_left->inst_offset));
-                       ARM_STR_IMM (code, ARMREG_LR, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
+                       if (arm_is_imm12 (ins->inst_left->inst_offset)) {
+                               ARM_STR_IMM (code, ARMREG_LR, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
+                       } else {
+                               code = mono_arm_emit_load_imm (code, ARMREG_IP, ins->inst_left->inst_offset);
+                               ARM_STR_REG_REG (code, ARMREG_LR, ins->inst_left->inst_basereg, ARMREG_IP);
+                       }
                        break;
                case OP_ENDFILTER:
                        if (ins->sreg1 != ARMREG_R0)
                                ARM_MOV_REG_REG (code, ARMREG_R0, ins->sreg1);
-                       g_assert (arm_is_imm12 (ins->inst_left->inst_offset));
-                       ARM_LDR_IMM (code, ARMREG_IP, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
+                       if (arm_is_imm12 (ins->inst_left->inst_offset)) {
+                               ARM_LDR_IMM (code, ARMREG_IP, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
+                       } else {
+                               g_assert (ARMREG_IP != ins->inst_left->inst_basereg);
+                               code = mono_arm_emit_load_imm (code, ARMREG_IP, ins->inst_left->inst_offset);
+                               ARM_LDR_REG_REG (code, ARMREG_IP, ins->inst_left->inst_basereg, ARMREG_IP);
+                       }
                        ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_IP);
                        break;
                case CEE_ENDFINALLY:
-                       g_assert (arm_is_imm12 (ins->inst_left->inst_offset));
-                       ARM_LDR_IMM (code, ARMREG_IP, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
+                       if (arm_is_imm12 (ins->inst_left->inst_offset)) {
+                               ARM_LDR_IMM (code, ARMREG_IP, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
+                       } else {
+                               g_assert (ARMREG_IP != ins->inst_left->inst_basereg);
+                               code = mono_arm_emit_load_imm (code, ARMREG_IP, ins->inst_left->inst_offset);
+                               ARM_LDR_REG_REG (code, ARMREG_IP, ins->inst_left->inst_basereg, ARMREG_IP);
+                       }
                        ARM_MOV_REG_REG (code, ARMREG_PC, ARMREG_IP);
                        break;
                case OP_CALL_HANDLER: 
@@ -2129,15 +2351,29 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ARM_LDFS (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
                        break;
                case CEE_CONV_R_UN: {
-                       /*static const guint64 adjust_val = 0x4330000000000000ULL;
-                       ppc_addis (code, ppc_r0, ppc_r0, 0x4330);
-                       ppc_stw (code, ppc_r0, -8, ppc_sp);
-                       ppc_stw (code, ins->sreg1, -4, ppc_sp);
-                       ppc_load (code, ppc_r11, &adjust_val);
-                       ppc_lfd (code, ins->dreg, -8, ppc_sp);
-                       ppc_lfd (code, ppc_f0, 0, ppc_r11);
-                       ppc_fsub (code, ins->dreg, ins->dreg, ppc_f0);*/
-                       g_assert_not_reached ();
+                       int tmpreg;
+                       tmpreg = ins->dreg == 0? 1: 0;
+                       ARM_CMP_REG_IMM8 (code, ins->sreg1, 0);
+                       ARM_FLTD (code, ins->dreg, ins->sreg1);
+                       ARM_B_COND (code, ARMCOND_GE, 8);
+                       /* save the temp register */
+                       ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 8);
+                       ARM_STFD (code, tmpreg, ARMREG_SP, 0);
+                       ARM_LDFD (code, tmpreg, ARMREG_PC, 12);
+                       ARM_FPA_ADFD (code, ins->dreg, ins->dreg, tmpreg);
+                       ARM_LDFD (code, tmpreg, ARMREG_SP, 0);
+                       ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, 8);
+                       /* skip the constant pool */
+                       ARM_B (code, 8);
+                       code += 4;
+                       *(int*)code = 0x41f00000;
+                       code += 4;
+                       *(int*)code = 0;
+                       code += 4;
+                       /* FIXME: adjust:
+                        * ldfltd  ftemp, [pc, #8] 0x41f00000 0x00000000
+                        * adfltd  fdest, fdest, ftemp
+                        */
                        break;
                }
                case CEE_CONV_R4:
@@ -2304,7 +2540,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FBGE_UN:
                        ARM_FCMP (code, ARM_FPA_CMF, ins->sreg1, ins->sreg2);
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_VS); /* V set */
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_CS);
+                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_GE);
                        break;
                case OP_FBLE:
                        ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
@@ -2313,7 +2549,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FBLE_UN:
                        ARM_FCMP (code, ARM_FPA_CMF, ins->sreg2, ins->sreg1);
                        EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_VS); /* V set */
-                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_CS); /* swapped */
+                       EMIT_COND_BRANCH_FLAGS (ins, ARMCOND_GE); /* swapped */
                        break;
                case CEE_CKFINITE: {
                        /*ppc_stfd (code, ins->sreg1, -8, ppc_sp);
@@ -2384,6 +2620,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
 
                switch (patch_info->type) {
                case MONO_PATCH_INFO_IP:
+                       g_assert_not_reached ();
                        patch_lis_ori (ip, ip);
                        continue;
                case MONO_PATCH_INFO_METHOD_REL:
@@ -2400,6 +2637,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono
                case MONO_PATCH_INFO_LDSTR:
                case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
                case MONO_PATCH_INFO_LDTOKEN:
+                       g_assert_not_reached ();
                        /* from OP_AOTCONST : lis + ori */
                        patch_lis_ori (ip, target);
                        continue;
@@ -2446,7 +2684,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        MonoBasicBlock *bb;
        MonoMethodSignature *sig;
        MonoInst *inst;
-       int alloc_size, pos, max_offset, i;
+       int alloc_size, pos, max_offset, i, rot_amount;
        guint8 *code;
        CallInfo *cinfo;
        int tracing = 0;
@@ -2460,35 +2698,23 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        cfg->code_size = 256 + sig->param_count * 20;
        code = cfg->native_code = g_malloc (cfg->code_size);
 
-       if (cfg->max_ireg >= 29)
-               cfg->used_int_regs |= USE_EXTRA_TEMPS;
        ARM_MOV_REG_REG (code, ARMREG_IP, ARMREG_SP);
-       ARM_PUSH (code, (cfg->used_int_regs | (1 << ARMREG_IP) | (1 << ARMREG_LR)));
-       prev_sp_offset = 8; /* ip and lr */
-       for (i = 0; i < 16; ++i) {
-               if (cfg->used_int_regs & (1 << i))
-                       prev_sp_offset += 4;
-       }
 
        alloc_size = cfg->stack_offset;
        pos = 0;
 
        if (!method->save_lmf) {
-               /*for (i = 31; i >= 14; --i) {
-                       if (cfg->used_float_regs & (1 << i)) {
-                               pos += sizeof (gdouble);
-                               ppc_stfd (code, i, -pos, ppc_sp);
-                       }
-               }*/
+               ARM_PUSH (code, (cfg->used_int_regs | (1 << ARMREG_IP) | (1 << ARMREG_LR)));
+               prev_sp_offset = 8; /* ip and lr */
+               for (i = 0; i < 16; ++i) {
+                       if (cfg->used_int_regs & (1 << i))
+                               prev_sp_offset += 4;
+               }
        } else {
-               /*int ofs;
-               pos += sizeof (MonoLMF);
+               ARM_PUSH (code, 0x5ff0);
+               prev_sp_offset = 4 * 10; /* all but r0-r3, sp and pc */
+               pos += sizeof (MonoLMF) - prev_sp_offset;
                lmf_offset = pos;
-               ofs = -pos + G_STRUCT_OFFSET(MonoLMF, iregs);
-               ppc_stmw (code, ppc_r13, ppc_r1, ofs);
-               for (i = 14; i < 32; i++) {
-                       ppc_stfd (code, i, (-pos + G_STRUCT_OFFSET(MonoLMF, fregs) + ((i-14) * sizeof (gdouble))), ppc_r1);
-               }*/
        }
        alloc_size += pos;
        // align to MONO_ARCH_FRAME_ALIGNMENT bytes
@@ -2497,19 +2723,26 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                alloc_size &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
        }
 
+       /* the stack used in the pushed regs */
+       if (prev_sp_offset & 4)
+               alloc_size += 4;
        cfg->stack_usage = alloc_size;
-       g_assert ((alloc_size & (MONO_ARCH_FRAME_ALIGNMENT-1)) == 0);
        if (alloc_size) {
-               g_assert (arm_is_imm8 (alloc_size));
-               ARM_SUB_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, alloc_size);
+               if ((i = mono_arm_is_rotated_imm8 (alloc_size, &rot_amount)) >= 0) {
+                       ARM_SUB_REG_IMM (code, ARMREG_SP, ARMREG_SP, i, rot_amount);
+               } else {
+                       code = mono_arm_emit_load_imm (code, ARMREG_IP, alloc_size);
+                       ARM_SUB_REG_REG (code, ARMREG_SP, ARMREG_SP, ARMREG_IP);
+               }
        }
        if (cfg->frame_reg != ARMREG_SP)
                ARM_MOV_REG_REG (code, cfg->frame_reg, ARMREG_SP);
+       //g_print ("prev_sp_offset: %d, alloc_size:%d\n", prev_sp_offset, alloc_size);
        prev_sp_offset += alloc_size;
 
         /* compute max_offset in order to use short forward jumps
-        * we always do it on ppc because the immediate displacement
-        * for jumps is too small 
+        * we could skip do it on arm because the immediate displacement
+        * for jumps is large enough, it may be useful later for constant pools
         */
        max_offset = 0;
        for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
@@ -2547,9 +2780,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                ARM_MOV_REG_REG (code, inst->dreg, ainfo->reg);
                        else if (ainfo->regtype == RegTypeFP) {
                                g_assert_not_reached ();
-                               //ppc_fmr (code, inst->dreg, ainfo->reg);
                        } else if (ainfo->regtype == RegTypeBase) {
-                               //g_assert (arm_is_imm12 (prev_sp_offset + ainfo->offset));
+                               g_assert (arm_is_imm12 (prev_sp_offset + ainfo->offset));
                                ARM_LDR_IMM (code, inst->dreg, ARMREG_SP, (prev_sp_offset + ainfo->offset));
                        } else
                                g_assert_not_reached ();
@@ -2561,8 +2793,12 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        if (ainfo->regtype == RegTypeGeneral) {
                                switch (ainfo->size) {
                                case 1:
-                                       g_assert (arm_is_imm12 (inst->inst_offset));
-                                       ARM_STRB_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+                                       if (arm_is_imm12 (inst->inst_offset))
+                                               ARM_STRB_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+                                       else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
+                                               ARM_STRB_REG_REG (code, ainfo->reg, inst->inst_basereg, ARMREG_IP);
+                                       }
                                        break;
                                case 2:
                                        g_assert (arm_is_imm8 (inst->inst_offset));
@@ -2575,59 +2811,61 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                        ARM_STR_IMM (code, ainfo->reg + 1, inst->inst_basereg, inst->inst_offset + 4);
                                        break;
                                default:
-                                       g_assert (arm_is_imm12 (inst->inst_offset));
-                                       ARM_STR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+                                       if (arm_is_imm12 (inst->inst_offset)) {
+                                               ARM_STR_IMM (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+                                       } else {
+                                               code = mono_arm_emit_load_imm (code, ARMREG_IP, inst->inst_offset);
+                                               ARM_STR_REG_REG (code, ainfo->reg, inst->inst_basereg, ARMREG_IP);
+                                       }
                                        break;
                                }
                        } else if (ainfo->regtype == RegTypeBase) {
+                               g_assert (arm_is_imm12 (prev_sp_offset + ainfo->offset));
                                switch (ainfo->size) {
                                case 1:
                                        ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
+                                       g_assert (arm_is_imm12 (inst->inst_offset));
                                        ARM_STRB_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
                                        break;
                                case 2:
                                        ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
+                                       g_assert (arm_is_imm8 (inst->inst_offset));
                                        ARM_STRH_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
                                        break;
                                case 8:
+                                       g_assert (arm_is_imm12 (inst->inst_offset));
                                        ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
                                        ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
+                                       g_assert (arm_is_imm12 (prev_sp_offset + ainfo->offset + 4));
+                                       g_assert (arm_is_imm12 (inst->inst_offset + 4));
                                        ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset + 4));
                                        ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset + 4);
                                        break;
                                default:
+                                       g_assert (arm_is_imm12 (inst->inst_offset));
                                        ARM_LDR_IMM (code, ARMREG_LR, ARMREG_SP, (prev_sp_offset + ainfo->offset));
                                        ARM_STR_IMM (code, ARMREG_LR, inst->inst_basereg, inst->inst_offset);
                                        break;
                                }
                        } else if (ainfo->regtype == RegTypeFP) {
-                               /*g_assert (ppc_is_imm16 (inst->inst_offset));
-                               if (ainfo->size == 8)
-                                       ppc_stfd (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
-                               else if (ainfo->size == 4)
-                                       ppc_stfs (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
-                               else*/
-                                       g_assert_not_reached ();
+                               g_assert_not_reached ();
                        } else if (ainfo->regtype == RegTypeStructByVal) {
                                int doffset = inst->inst_offset;
                                int soffset = 0;
                                int cur_reg;
                                int size = 0;
-                               //g_assert (ppc_is_imm16 (inst->inst_offset));
-                               //g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->size * sizeof (gpointer)));
                                if (mono_class_from_mono_type (inst->inst_vtype))
                                        size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
                                for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) {
+                                       g_assert (arm_is_imm12 (doffset));
                                        ARM_STR_IMM (code, ainfo->reg + cur_reg, inst->inst_basereg, doffset);
                                        soffset += sizeof (gpointer);
                                        doffset += sizeof (gpointer);
                                }
                                if (ainfo->vtsize) {
-                                       g_assert_not_reached ();
-                                       /* load the previous stack pointer in r11 (r0 gets overwritten by the memcpy) */
-                                       //ppc_lwz (code, ppc_r11, 0, ppc_sp);
                                        /* FIXME: handle overrun! with struct sizes not multiple of 4 */
-                                       //code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer), inst->inst_basereg, doffset, ppc_r11, ainfo->offset + soffset);
+                                       //g_print ("emit_memcpy (prev_sp_ofs: %d, ainfo->offset: %d, soffset: %d)\n", prev_sp_offset, ainfo->offset, soffset);
+                                       code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer), inst->inst_basereg, doffset, ARMREG_SP, prev_sp_offset + ainfo->offset);
                                }
                        } else if (ainfo->regtype == RegTypeStructByAddr) {
                                g_assert_not_reached ();
@@ -2653,31 +2891,29 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                } else {
                        ARM_BL (code, 0);
                }
-#if ARM_PORT
-               /* we build the MonoLMF structure on the stack - see mini-ppc.h */
+               /* we build the MonoLMF structure on the stack - see mini-arm.h */
                /* lmf_offset is the offset from the previous stack pointer,
                 * alloc_size is the total stack space allocated, so the offset
                 * of MonoLMF from the current stack ptr is alloc_size - lmf_offset.
-                * The pointer to the struct is put in ppc_r11 (new_lmf).
+                * The pointer to the struct is put in r1 (new_lmf).
+                * r2 is used as scratch
                 * The callee-saved registers are already in the MonoLMF structure
                 */
-               ppc_addi (code, ppc_r11, ppc_sp, alloc_size - lmf_offset);
-               /* ppc_r3 is the result from mono_get_lmf_addr () */
-               ppc_stw (code, ppc_r3, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11);
+               code = emit_big_add (code, ARMREG_R1, ARMREG_SP, alloc_size - lmf_offset);
+               /* r0 is the result from mono_get_lmf_addr () */
+               ARM_STR_IMM (code, ARMREG_R0, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, lmf_addr));
                /* new_lmf->previous_lmf = *lmf_addr */
-               ppc_lwz (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3);
-               ppc_stw (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11);
-               /* *(lmf_addr) = r11 */
-               ppc_stw (code, ppc_r11, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3);
+               ARM_LDR_IMM (code, ARMREG_R2, ARMREG_R0, G_STRUCT_OFFSET (MonoLMF, previous_lmf));
+               ARM_STR_IMM (code, ARMREG_R2, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, previous_lmf));
+               /* *(lmf_addr) = r1 */
+               ARM_STR_IMM (code, ARMREG_R1, ARMREG_R0, G_STRUCT_OFFSET (MonoLMF, previous_lmf));
                /* save method info */
-               ppc_load (code, ppc_r0, method);
-               ppc_stw (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, method), ppc_r11);
-               ppc_stw (code, ppc_sp, G_STRUCT_OFFSET(MonoLMF, ebp), ppc_r11);
+               code = mono_arm_emit_load_imm (code, ARMREG_R2, method);
+               ARM_STR_IMM (code, ARMREG_R2, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, method));
+               ARM_STR_IMM (code, ARMREG_SP, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, ebp));
                /* save the current IP */
-               mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
-               ppc_load (code, ppc_r0, 0x01010101);
-               ppc_stw (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, eip), ppc_r11);
-#endif
+               ARM_MOV_REG_REG (code, ARMREG_R2, ARMREG_PC);
+               ARM_STR_IMM (code, ARMREG_R2, ARMREG_R1, G_STRUCT_OFFSET (MonoLMF, eip));
        }
 
        if (tracing)
@@ -2695,7 +2931,7 @@ mono_arch_emit_epilog (MonoCompile *cfg)
 {
        MonoJumpInfo *patch_info;
        MonoMethod *method = cfg->method;
-       int pos, i;
+       int pos, i, rot_amount;
        int max_epilog_size = 16 + 20*4;
        guint8 *code;
 
@@ -2725,41 +2961,32 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        pos = 0;
 
        if (method->save_lmf) {
-#if ARM_PORT
                int lmf_offset;
-               pos +=  sizeof (MonoLMF);
+               /* all but r0-r3, sp and pc */
+               pos += sizeof (MonoLMF) - (4 * 10);
                lmf_offset = pos;
-               /* save the frame reg in r8 */
-               ppc_mr (code, ppc_r8, cfg->frame_reg);
-               ppc_addi (code, ppc_r11, cfg->frame_reg, cfg->stack_usage - lmf_offset);
-               /* r5 = previous_lmf */
-               ppc_lwz (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11);
-               /* r6 = lmf_addr */
-               ppc_lwz (code, ppc_r6, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11);
+               /* r2 contains the pointer to the current LMF */
+               code = emit_big_add (code, ARMREG_R2, cfg->frame_reg, cfg->stack_usage - lmf_offset);
+               /* ip = previous_lmf */
+               ARM_LDR_IMM (code, ARMREG_IP, ARMREG_R2, G_STRUCT_OFFSET (MonoLMF, previous_lmf));
+               /* lr = lmf_addr */
+               ARM_LDR_IMM (code, ARMREG_LR, ARMREG_R2, G_STRUCT_OFFSET (MonoLMF, lmf_addr));
                /* *(lmf_addr) = previous_lmf */
-               ppc_stw (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r6);
+               ARM_STR_IMM (code, ARMREG_IP, ARMREG_LR, G_STRUCT_OFFSET (MonoLMF, previous_lmf));
                /* FIXME: speedup: there is no actual need to restore the registers if
                 * we didn't actually change them (idea from Zoltan).
                 */
                /* restore iregs */
-               ppc_lmw (code, ppc_r13, ppc_r11, G_STRUCT_OFFSET(MonoLMF, iregs));
-               /* restore fregs */
-               /*for (i = 14; i < 32; i++) {
-                       ppc_lfd (code, i, G_STRUCT_OFFSET(MonoLMF, fregs) + ((i-14) * sizeof (gdouble)), ppc_r11);
-               }*/
-               g_assert (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET));
-               /* use the saved copy of the frame reg in r8 */
-               if (1 || cfg->flags & MONO_CFG_HAS_CALLS) {
-                       ppc_lwz (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, ppc_r8);
-                       ppc_mtlr (code, ppc_r0);
-               }
-               ppc_addic (code, ppc_sp, ppc_r8, cfg->stack_usage);
-#endif
-               ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, cfg->stack_usage);
-               ARM_POP_NWB (code, cfg->used_int_regs | ((1 << ARMREG_SP) | (1 << ARMREG_PC)));
+               /* point sp at the registers to restore: 10 is 14 -4, because we skip r0-r3 */
+               ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_R2, (sizeof (MonoLMF) - 10 * sizeof (gulong)));
+               ARM_POP_NWB (code, 0xaff0); /* restore ip to sp and lr to pc */
        } else {
-               //g_assert (arm_is_imm8 (cfg->stack_usage));
-               ARM_ADD_REG_IMM8 (code, ARMREG_SP, ARMREG_SP, cfg->stack_usage);
+               if ((i = mono_arm_is_rotated_imm8 (cfg->stack_usage, &rot_amount)) >= 0) {
+                       ARM_ADD_REG_IMM (code, ARMREG_SP, cfg->frame_reg, i, rot_amount);
+               } else {
+                       code = mono_arm_emit_load_imm (code, ARMREG_IP, cfg->stack_usage);
+                       ARM_ADD_REG_REG (code, ARMREG_SP, ARMREG_SP, ARMREG_IP);
+               }
                ARM_POP_NWB (code, cfg->used_int_regs | ((1 << ARMREG_SP) | (1 << ARMREG_PC)));
        }
 
@@ -2831,6 +3058,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                switch (patch_info->type) {
                case MONO_PATCH_INFO_EXC: {
                        unsigned char *ip = patch_info->ip.i + cfg->native_code;
+                       const char *ex_name = patch_info->data.target;
                        i = exception_id_by_name (patch_info->data.target);
                        if (exc_throw_pos [i]) {
                                arm_patch (ip, exc_throw_pos [i]);
@@ -2840,6 +3068,8 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                                exc_throw_pos [i] = code;
                        }
                        arm_patch (ip, code);
+                       //*(int*)code = 0xef9f0001;
+                       code += 4;
                        /*mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);*/
                        ARM_LDR_IMM (code, ARMREG_R0, ARMREG_PC, 0);
                        /* we got here from a conditional call, so the calling ip is set in lr already */
@@ -2847,7 +3077,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
                        patch_info->data.name = "mono_arch_throw_exception_by_name";
                        patch_info->ip.i = code - cfg->native_code;
                        ARM_B (code, 0);
-                       *(gpointer*)code = patch_info->data.target;
+                       *(gpointer*)code = ex_name;
                        code += 4;
                        break;
                }
@@ -2932,3 +3162,11 @@ mono_arch_flush_register_windows (void)
 {
 }
 
+void
+mono_arch_fixup_jinfo (MonoCompile *cfg)
+{
+       /* max encoded stack usage is 64KB * 4 */
+       g_assert ((cfg->stack_usage & ~(0xffff << 2)) == 0);
+       cfg->jit_info->used_regs |= cfg->stack_usage << 14;
+}
+