2005-07-07 Atsushi Enomoto <atsushi@ximian.com>
[mono.git] / mono / mini / mini-ia64.c
index 87f57c44e07a62d3d389c7ab10c01c93c71bee6a..3fab8d82642e4de8c9c64be91e8889dc5d11d975 100644 (file)
@@ -264,7 +264,8 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
                        cinfo->ret.storage = ArgInFloatReg;
                        cinfo->ret.reg = 8;
                        break;
-               case MONO_TYPE_VALUETYPE: {
+               case MONO_TYPE_VALUETYPE:
+               case MONO_TYPE_TYPEDBYREF: {
                        guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
 
                        add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
@@ -275,12 +276,6 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
                                cinfo->ret.storage = ArgValuetypeAddrInIReg;
                        break;
                }
-               case MONO_TYPE_TYPEDBYREF:
-                       /* Same as a valuetype with size 24 */
-                       add_general (&gr, &stack_size, &cinfo->ret);
-                       if (cinfo->ret.storage == ArgInIReg)
-                               cinfo->ret.storage = ArgValuetypeAddrInIReg;
-                       break;
                case MONO_TYPE_VOID:
                        break;
                default:
@@ -503,6 +498,12 @@ mono_ia64_alloc_stacked_registers (MonoCompile *cfg)
        cfg->arch.reg_saved_b0 = cfg->arch.reg_local0 - 2;
        cfg->arch.reg_saved_sp = cfg->arch.reg_local0 - 3;
 
+       /* 
+        * Need to allocate at least 2 out register for use by CEE_THROW / the system
+        * exception throwing code.
+        */
+       cfg->arch.n_out_regs = MAX (cfg->arch.n_out_regs, 2);
+
        g_free (cinfo);
 }
 
@@ -539,7 +540,7 @@ mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
 }
  
 void
-mono_arch_allocate_vars (MonoCompile *m)
+mono_arch_allocate_vars (MonoCompile *cfg)
 {
        MonoMethodSignature *sig;
        MonoMethodHeader *header;
@@ -549,11 +550,11 @@ mono_arch_allocate_vars (MonoCompile *m)
        gint32 *offsets;
        CallInfo *cinfo;
 
-       mono_ia64_alloc_stacked_registers (m);
+       mono_ia64_alloc_stacked_registers (cfg);
 
-       header = mono_method_get_header (m->method);
+       header = mono_method_get_header (cfg->method);
 
-       sig = mono_method_signature (m->method);
+       sig = mono_method_signature (cfg->method);
 
        cinfo = get_call_info (sig, FALSE);
 
@@ -563,49 +564,48 @@ mono_arch_allocate_vars (MonoCompile *m)
         */
 
        /* Locals are allocated backwards from %fp */
-       m->frame_reg = m->arch.reg_saved_sp;
+       cfg->frame_reg = cfg->arch.reg_saved_sp;
        offset = 0;
 
-       if (m->method->save_lmf) {
+       if (cfg->method->save_lmf) {
                /* FIXME: */
 #if 0
                /* Reserve stack space for saving LMF + argument regs */
                offset += sizeof (MonoLMF);
-               m->arch.lmf_offset = offset;
+               cfg->arch.lmf_offset = offset;
 #endif
        }
 
        if (sig->ret->type != MONO_TYPE_VOID) {
                switch (cinfo->ret.storage) {
                case ArgInIReg:
-                       m->ret->opcode = OP_REGVAR;
-                       m->ret->inst_c0 = cinfo->ret.reg;
+                       cfg->ret->opcode = OP_REGVAR;
+                       cfg->ret->inst_c0 = cinfo->ret.reg;
                        break;
                case ArgInFloatReg:
-                       m->ret->opcode = OP_REGVAR;
-                       m->ret->inst_c0 = cinfo->ret.reg;
+                       cfg->ret->opcode = OP_REGVAR;
+                       cfg->ret->inst_c0 = cinfo->ret.reg;
                        break;
                case ArgValuetypeAddrInIReg:
-                       m->ret->opcode = OP_REGVAR;
-                       m->ret->inst_c0 = m->arch.reg_in0 + cinfo->ret.reg;
+                       cfg->ret->opcode = OP_REGVAR;
+                       cfg->ret->inst_c0 = cfg->arch.reg_in0 + cinfo->ret.reg;
                        break;
                default:
                        g_assert_not_reached ();
                }
-               m->ret->dreg = m->ret->inst_c0;
+               cfg->ret->dreg = cfg->ret->inst_c0;
        }
 
        /* Allocate locals */
-       offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
+       offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
        if (locals_stack_align) {
-               offset += (locals_stack_align - 1);
-               offset &= ~(locals_stack_align - 1);
+               offset = ALIGN_TO (offset, locals_stack_align);
        }
-       for (i = m->locals_start; i < m->num_varinfo; i++) {
+       for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
                if (offsets [i] != -1) {
-                       MonoInst *inst = m->varinfo [i];
+                       MonoInst *inst = cfg->varinfo [i];
                        inst->opcode = OP_REGOFFSET;
-                       inst->inst_basereg = m->frame_reg;
+                       inst->inst_basereg = cfg->frame_reg;
                        inst->inst_offset = - (offset + offsets [i]);
                        // printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
                }
@@ -615,11 +615,11 @@ mono_arch_allocate_vars (MonoCompile *m)
 
        if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
                g_assert (cinfo->sig_cookie.storage == ArgOnStack);
-               m->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
+               cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
        }
 
        for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
-               inst = m->varinfo [i];
+               inst = cfg->varinfo [i];
                if (inst->opcode != OP_REGVAR) {
                        ArgInfo *ainfo = &cinfo->args [i];
                        gboolean inreg = TRUE;
@@ -630,6 +630,7 @@ mono_arch_allocate_vars (MonoCompile *m)
                        else
                                arg_type = sig->params [i - sig->hasthis];
 
+                       /* FIXME: VOLATILE is only set if the liveness pass runs */
                        if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
                                inreg = FALSE;
 
@@ -638,7 +639,7 @@ mono_arch_allocate_vars (MonoCompile *m)
                        switch (ainfo->storage) {
                        case ArgInIReg:
                                inst->opcode = OP_REGVAR;
-                               inst->dreg = m->arch.reg_in0 + ainfo->reg;
+                               inst->dreg = cfg->arch.reg_in0 + ainfo->reg;
                                break;
                        case ArgInFloatReg:
                                /* 
@@ -649,7 +650,7 @@ mono_arch_allocate_vars (MonoCompile *m)
                                break;
                        case ArgOnStack:
                                inst->opcode = OP_REGOFFSET;
-                               inst->inst_basereg = m->frame_reg;
+                               inst->inst_basereg = cfg->frame_reg;
                                inst->inst_offset = ARGS_OFFSET + ainfo->offset;
                                break;
                        case ArgValuetypeInReg:
@@ -660,7 +661,7 @@ mono_arch_allocate_vars (MonoCompile *m)
 
                        if (!inreg && (ainfo->storage != ArgOnStack)) {
                                inst->opcode = OP_REGOFFSET;
-                               inst->inst_basereg = m->frame_reg;
+                               inst->inst_basereg = cfg->frame_reg;
                                /* These arguments are saved to the stack in the prolog */
                                if (ainfo->storage == ArgValuetypeInReg) {
                                        NOT_IMPLEMENTED;
@@ -668,12 +669,13 @@ mono_arch_allocate_vars (MonoCompile *m)
                                }
                                else
                                        offset += sizeof (gpointer);
+                               offset = ALIGN_TO (offset, sizeof (gpointer));
                                inst->inst_offset = - offset;
                        }
                }
        }
 
-       m->stack_offset = offset;
+       cfg->stack_offset = offset;
 
        g_free (cinfo);
 }
@@ -738,8 +740,6 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
                        MonoMethodSignature *tmp_sig;
 
-                       NOT_IMPLEMENTED;
-
                        /* Emit the signature cookie just before the implicit arguments */
                        MonoInst *sig_arg;
                        /* FIXME: Add support for signature tokens to AOT */
@@ -763,6 +763,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
 
                        MONO_INST_NEW (cfg, arg, OP_OUTARG);
                        arg->inst_left = sig_arg;
+                       arg->inst_imm = 16 + cinfo->sig_cookie.offset;
                        arg->type = STACK_PTR;
 
                        /* prepend, so they get reversed */
@@ -804,6 +805,12 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call,
                                        size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
                                else
                                        size = mono_type_stack_size (&in->klass->byval_arg, &align);
+
+                               /* 
+                                * FIXME: The destination is 'size' long, but the source might
+                                * be smaller.
+                                */
+
                                if (ainfo->storage == ArgValuetypeInReg) {
                                        NOT_IMPLEMENTED;
                                }
@@ -998,14 +1005,12 @@ opcode_to_cond (int opcode)
 }
 
 static CompType
-opcode_to_type (int opcode)
+opcode_to_type (int opcode, int cmp_opcode)
 {
        if ((opcode >= CEE_BEQ) && (opcode <= CEE_BLT_UN))
                return CMP_TYPE_L;
        else if ((opcode >= OP_CEQ) && (opcode <= OP_CLT_UN))
                return CMP_TYPE_L;
-       else if ((opcode >= OP_COND_EXC_EQ) && (opcode <= OP_COND_EXC_LT_UN))
-               return CMP_TYPE_L;
        else if ((opcode >= OP_IBEQ) && (opcode <= OP_IBLE_UN))
                return CMP_TYPE_I;
        else if ((opcode >= OP_ICEQ) && (opcode <= OP_ICLT_UN))
@@ -1014,7 +1019,15 @@ opcode_to_type (int opcode)
                return CMP_TYPE_F;
        else if ((opcode >= OP_FCEQ) && (opcode <= OP_FCLT_UN))
                return CMP_TYPE_F;
-       else {
+       else if ((opcode >= OP_COND_EXC_EQ) && (opcode <= OP_COND_EXC_LT_UN)) {
+               switch (cmp_opcode) {
+               case OP_ICOMPARE:
+               case OP_ICOMPARE_IMM:
+                       return CMP_TYPE_I;
+               default:
+                       return CMP_TYPE_L;
+               }
+       } else {
                g_error ("Unknown opcode '%s' in opcode_to_type", mono_inst_name (opcode));
                return 0;
        }
@@ -1034,9 +1047,9 @@ int cond_to_ia64_cmp [][3] = {
 };
 
 static int
-opcode_to_ia64_cmp (int opcode)
+opcode_to_ia64_cmp (int opcode, int cmp_opcode)
 {
-       return cond_to_ia64_cmp [opcode_to_cond (opcode)][opcode_to_type (opcode)];
+       return cond_to_ia64_cmp [opcode_to_cond (opcode)][opcode_to_type (opcode, cmp_opcode)];
 }
 
 int cond_to_ia64_cmp_imm [][3] = {
@@ -1053,10 +1066,10 @@ int cond_to_ia64_cmp_imm [][3] = {
 };
 
 static int
-opcode_to_ia64_cmp_imm (int opcode)
+opcode_to_ia64_cmp_imm (int opcode, int cmp_opcode)
 {
        /* The condition needs to be reversed */
-       return cond_to_ia64_cmp_imm [opcode_to_cond (opcode)][opcode_to_type (opcode)];
+       return cond_to_ia64_cmp_imm [opcode_to_cond (opcode)][opcode_to_type (opcode, cmp_opcode)];
 }
 
 static void
@@ -1376,14 +1389,16 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        switch (next->opcode) {
                        case CEE_BGE:
                        case CEE_BLT:
-                       case CEE_BGE_UN:
-                       case CEE_BLT_UN:
                        case OP_COND_EXC_LT:
                        case OP_IBGE:
                        case OP_IBLT:
+                               imm = ia64_is_imm8 (ins->inst_imm - 1);
+                               break;
                        case OP_IBGE_UN:
                        case OP_IBLT_UN:
-                               imm = ia64_is_imm8 (ins->inst_imm - 1);
+                       case CEE_BGE_UN:
+                       case CEE_BLT_UN:
+                               imm = ia64_is_imm8 (ins->inst_imm - 1) && (ins->inst_imm > 0);
                                break;
                        default:
                                imm = ia64_is_imm8 (ins->inst_imm);
@@ -1391,11 +1406,11 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
 
                        if (imm) {
-                               ins->opcode = opcode_to_ia64_cmp_imm (next->opcode);
+                               ins->opcode = opcode_to_ia64_cmp_imm (next->opcode, ins->opcode);
                                ins->sreg2 = ins->sreg1;
                        }
                        else {
-                               ins->opcode = opcode_to_ia64_cmp (next->opcode);
+                               ins->opcode = opcode_to_ia64_cmp (next->opcode, ins->opcode);
 
                                if (ins->inst_imm == 0)
                                        ins->sreg2 = IA64_R0;
@@ -1468,7 +1483,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        next = ins->next;
 
-                       ins->opcode = opcode_to_ia64_cmp (next->opcode);
+                       ins->opcode = opcode_to_ia64_cmp (next->opcode, ins->opcode);
                        switch (next->opcode) {
                        case CEE_BEQ:
                        case CEE_BNE_UN:
@@ -1535,7 +1550,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                case OP_MUL_IMM: {
                        /* This should be emulated, but rules in inssel.brg generate it */
-                       int i;
+                       int i, sum_reg;
 
                        /* First the easy cases */
                        if (ins->inst_imm == 1) {
@@ -1549,12 +1564,86 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
                                        break;
                                }
 
+                       /* This could be optimized */
                        if (ins->opcode == OP_MUL_IMM) {
-                               /* FIXME: */
-                               g_error ("Multiplication by %ld not implemented\n", ins->inst_imm);
+                               sum_reg = 0;
+                               for (i = 0; i < 64; ++i) {
+                                       if (ins->inst_imm & (((gint64)1) << i)) {
+                                               NEW_INS (cfg, temp, OP_SHL_IMM);
+                                               temp->dreg = mono_regstate_next_int (cfg->rs);
+                                               temp->sreg1 = ins->sreg1;
+                                               temp->inst_imm = i;
+
+                                               if (sum_reg == 0)
+                                                       sum_reg = temp->dreg;
+                                               else {
+                                                       NEW_INS (cfg, temp2, CEE_ADD);
+                                                       temp2->dreg = mono_regstate_next_int (cfg->rs);
+                                                       temp2->sreg1 = sum_reg;
+                                                       temp2->sreg2 = temp->dreg;
+                                                       sum_reg = temp2->dreg;
+                                               }
+                                       }
+                               }
+                               ins->opcode = OP_MOVE;
+                               ins->sreg1 = sum_reg;
                        }
                        break;
                }
+               case CEE_CONV_OVF_U4:
+                       NEW_INS (cfg, temp, OP_IA64_CMP4_LT);
+                       temp->sreg1 = ins->sreg1;
+                       temp->sreg2 = IA64_R0;
+
+                       NEW_INS (cfg, temp, OP_IA64_COND_EXC);
+                       temp->inst_p1 = (char*)"OverflowException";
+
+                       ins->opcode = OP_MOVE;
+                       break;
+               case CEE_CONV_OVF_I4_UN:
+                       NEW_INS (cfg, temp, OP_ICONST);
+                       temp->inst_c0 = 0x7fffffff;
+                       temp->dreg = mono_regstate_next_int (cfg->rs);
+
+                       NEW_INS (cfg, temp2, OP_IA64_CMP4_GT_UN);
+                       temp2->sreg1 = ins->sreg1;
+                       temp2->sreg2 = temp->dreg;
+
+                       NEW_INS (cfg, temp, OP_IA64_COND_EXC);
+                       temp->inst_p1 = (char*)"OverflowException";
+
+                       ins->opcode = OP_MOVE;
+                       break;
+               case OP_FCONV_TO_I4:
+               case OP_FCONV_TO_I2:
+               case OP_FCONV_TO_U2:
+               case OP_FCONV_TO_I1:
+               case OP_FCONV_TO_U1:
+                       NEW_INS (cfg, temp, OP_FCONV_TO_I8);
+                       temp->sreg1 = ins->sreg1;
+                       temp->dreg = ins->dreg;
+
+                       switch (ins->opcode) {
+                       case OP_FCONV_TO_I4:
+                               ins->opcode = OP_SEXT_I4;
+                               break;
+                       case OP_FCONV_TO_I2:
+                               ins->opcode = OP_SEXT_I2;
+                               break;
+                       case OP_FCONV_TO_U2:
+                               ins->opcode = OP_ZEXT_I4;
+                               break;
+                       case OP_FCONV_TO_I1:
+                               ins->opcode = OP_SEXT_I1;
+                               break;
+                       case OP_FCONV_TO_U1:
+                               ins->opcode = OP_ZEXT_I1;
+                               break;
+                       default:
+                               g_assert_not_reached ();
+                       }
+                       ins->sreg1 = ins->dreg;
+                       break;
                default:
                        break;
                }
@@ -1578,6 +1667,96 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
        mono_local_regalloc (cfg, bb);
 }
 
+/*
+ * emit_load_volatile_arguments:
+ *
+ *  Load volatile arguments from the stack to the original input registers.
+ * Required before a tail call.
+ */
+static Ia64CodegenState
+emit_load_volatile_arguments (MonoCompile *cfg, Ia64CodegenState code)
+{
+       MonoMethod *method = cfg->method;
+       MonoMethodSignature *sig;
+       MonoInst *ins;
+       CallInfo *cinfo;
+       guint32 i;
+
+       /* FIXME: Generate intermediate code instead */
+
+       sig = mono_method_signature (method);
+
+       cinfo = get_call_info (sig, FALSE);
+       
+       /* This is the opposite of the code in emit_prolog */
+       for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
+               ArgInfo *ainfo = cinfo->args + i;
+               gint32 stack_offset;
+               MonoType *arg_type;
+               ins = cfg->varinfo [i];
+
+               if (sig->hasthis && (i == 0))
+                       arg_type = &mono_defaults.object_class->byval_arg;
+               else
+                       arg_type = sig->params [i - sig->hasthis];
+
+               arg_type = mono_type_get_underlying_type (arg_type);
+
+               stack_offset = ainfo->offset + ARGS_OFFSET;
+
+               /* Save volatile arguments to the stack */
+               if (ins->opcode != OP_REGVAR) {
+                       switch (ainfo->storage) {
+                       case ArgInIReg:
+                       case ArgInFloatReg:
+                               /* FIXME: big offsets */
+                               g_assert (ins->opcode == OP_REGOFFSET);
+                               ia64_adds_imm (code, GP_SCRATCH_REG, ins->inst_offset, ins->inst_basereg);
+                               if (arg_type->byref)
+                                       ia64_ld8 (code, cfg->arch.reg_in0 + ainfo->reg, GP_SCRATCH_REG);
+                               else {
+                                       switch (arg_type->type) {
+                                       case MONO_TYPE_R4:
+                                               ia64_ldfs (code, ainfo->reg, GP_SCRATCH_REG);
+                                               break;
+                                       case MONO_TYPE_R8:
+                                               ia64_ldfd (code, ainfo->reg, GP_SCRATCH_REG);
+                                               break;
+                                       default:
+                                               ia64_ld8 (code, cfg->arch.reg_in0 + ainfo->reg, GP_SCRATCH_REG);
+                                               break;
+                                       }
+                               }
+                               break;
+                       case ArgOnStack:
+                               break;
+                       default:
+                               NOT_IMPLEMENTED;
+                       }
+               }
+
+               if (ins->opcode == OP_REGVAR) {
+                       /* Argument allocated to (non-volatile) register */
+                       switch (ainfo->storage) {
+                       case ArgInIReg:
+                               if (ins->dreg != cfg->arch.reg_in0 + ainfo->reg)
+                                       ia64_mov (code, cfg->arch.reg_in0 + ainfo->reg, ins->dreg);
+                               break;
+                       case ArgOnStack:
+                               ia64_adds_imm (code, GP_SCRATCH_REG, 16 + ainfo->offset, cfg->frame_reg);
+                               ia64_st8 (code, GP_SCRATCH_REG, ins->dreg);
+                               break;
+                       default:
+                               NOT_IMPLEMENTED;
+                       }
+               }
+       }
+
+       g_free (cinfo);
+
+       return code;
+}
+
 static Ia64CodegenState
 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, Ia64CodegenState code)
 {
@@ -1624,7 +1803,7 @@ emit_call (MonoCompile *cfg, Ia64CodegenState code, guint32 patch_type, gconstpo
 {
        mono_add_patch_info (cfg, code.buf - cfg->native_code, patch_type, data);
 
-       if (patch_type == MONO_PATCH_INFO_ABS) {
+       if ((patch_type == MONO_PATCH_INFO_ABS) || (patch_type == MONO_PATCH_INFO_INTERNAL_METHOD)) {
                /* Indirect call */
                ia64_movl (code, GP_SCRATCH_REG, 0);
                ia64_ld8_inc_imm (code, GP_SCRATCH_REG2, GP_SCRATCH_REG, 8);
@@ -1632,8 +1811,12 @@ emit_call (MonoCompile *cfg, Ia64CodegenState code, guint32 patch_type, gconstpo
                ia64_ld8 (code, IA64_GP, GP_SCRATCH_REG);
                ia64_br_call_reg (code, IA64_B0, IA64_B6);
        }
-       else
-               ia64_br_call (code, IA64_B0, 0);
+       else {
+               /* Can't use a direct call since the displacement might be too small */
+               ia64_movl (code, GP_SCRATCH_REG, 0);
+               ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
+               ia64_br_call_reg (code, IA64_B0, IA64_B6);
+       }
 
        return code;
 }
@@ -1676,9 +1859,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
        while (ins) {
                offset = code.buf - cfg->native_code;
 
-               max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
+               max_len = ((int)(((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN])) + 128;
 
-               if (offset > (cfg->code_size - max_len - 16)) {
+               while (offset + max_len + 16 > cfg->code_size) {
                        ia64_codegen_close (code);
 
                        offset = code.buf - cfg->native_code;
@@ -1733,6 +1916,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ia64_begin_bundle (code);
                        ins->inst_c0 = code.buf - cfg->native_code;
                        break;
+               case CEE_NOP:
+                       break;
                case OP_BR_REG:
                        ia64_mov_to_br (code, IA64_B6, ins->sreg1);
                        ia64_br_cond_reg (code, IA64_B6);
@@ -1796,9 +1981,57 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        /* (sreg2 <= 0) && (res > ins->sreg1) => signed overflow */
                        ia64_cmp4_lt_pred (code, 9, 6, 10, ins->sreg1, GP_SCRATCH_REG);
 
+                       /* res <u sreg1 => unsigned overflow */
+                       ia64_cmp4_ltu (code, 7, 10, GP_SCRATCH_REG, ins->sreg1);
+
+                       /* FIXME: Predicate this since this is a side effect */
                        ia64_mov (code, ins->dreg, GP_SCRATCH_REG);
+                       break;
+               case OP_ISUBCC:
+                       /* p6 and p7 is set if there is signed/unsigned overflow */
+                       
+                       /* Set p8-p9 == (sreg2 > 0) */
+                       ia64_cmp4_lt (code, 8, 9, IA64_R0, ins->sreg2);
+
+                       ia64_sub (code, GP_SCRATCH_REG, ins->sreg1, ins->sreg2);
+                       
+                       /* (sreg2 > 0) && (res > ins->sreg1) => signed overflow */
+                       ia64_cmp4_gt_pred (code, 8, 6, 10, GP_SCRATCH_REG, ins->sreg1);
+                       /* (sreg2 <= 0) && (res < ins->sreg1) => signed overflow */
+                       ia64_cmp4_lt_pred (code, 9, 6, 10, GP_SCRATCH_REG, ins->sreg1);
 
-                       /* FIXME: Set p7 as well */
+                       /* sreg1 <u sreg2 => unsigned overflow */
+                       ia64_cmp4_ltu (code, 7, 10, ins->sreg1, ins->sreg2);
+
+                       /* FIXME: Predicate this since this is a side effect */
+                       ia64_mov (code, ins->dreg, GP_SCRATCH_REG);
+                       break;
+               case OP_ADDCC:
+                       /* Same as OP_IADDCC */
+                       ia64_cmp_lt (code, 8, 9, IA64_R0, ins->sreg2);
+
+                       ia64_add (code, GP_SCRATCH_REG, ins->sreg1, ins->sreg2);
+                       
+                       ia64_cmp_lt_pred (code, 8, 6, 10, GP_SCRATCH_REG, ins->sreg1);
+                       ia64_cmp_lt_pred (code, 9, 6, 10, ins->sreg1, GP_SCRATCH_REG);
+
+                       ia64_cmp_ltu (code, 7, 10, GP_SCRATCH_REG, ins->sreg1);
+
+                       ia64_mov (code, ins->dreg, GP_SCRATCH_REG);
+                       break;
+               case OP_SUBCC:
+                       /* Same as OP_ISUBCC */
+
+                       ia64_cmp_lt (code, 8, 9, IA64_R0, ins->sreg2);
+
+                       ia64_sub (code, GP_SCRATCH_REG, ins->sreg1, ins->sreg2);
+                       
+                       ia64_cmp_gt_pred (code, 8, 6, 10, GP_SCRATCH_REG, ins->sreg1);
+                       ia64_cmp_lt_pred (code, 9, 6, 10, GP_SCRATCH_REG, ins->sreg1);
+
+                       ia64_cmp_ltu (code, 7, 10, ins->sreg1, ins->sreg2);
+
+                       ia64_mov (code, ins->dreg, GP_SCRATCH_REG);
                        break;
                case OP_ADD_IMM:
                case OP_IADD_IMM:
@@ -1877,6 +2110,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_SEXT_I2:
                        ia64_sxt2 (code, ins->dreg, ins->sreg1);
                        break;
+               case OP_SEXT_I4:
+                       ia64_sxt4 (code, ins->dreg, ins->sreg1);
+                       break;
+               case OP_ZEXT_I1:
+                       ia64_zxt1 (code, ins->dreg, ins->sreg1);
+                       break;
+               case OP_ZEXT_I2:
+                       ia64_zxt2 (code, ins->dreg, ins->sreg1);
+                       break;
+               case OP_ZEXT_I4:
+                       ia64_zxt4 (code, ins->dreg, ins->sreg1);
+                       break;
 
                        /* Compare opcodes */
                case OP_IA64_CMP4_EQ:
@@ -2035,16 +2280,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
 
                case OP_COND_EXC_IOV:
-                       /* FIXME: */
-                       ia64_break_i_pred (code, 6, 0);
+               case OP_COND_EXC_OV:
+                       mono_add_patch_info (cfg, code.buf - cfg->native_code,
+                                                                MONO_PATCH_INFO_EXC, "OverflowException");
+                       ia64_br_cond_pred (code, 6, 0);
                        break;
                case OP_COND_EXC_IC:
-                       /* FIXME: */
-                       ia64_break_i_pred (code, 7, 0);
+               case OP_COND_EXC_C:
+                       mono_add_patch_info (cfg, code.buf - cfg->native_code,
+                                                                MONO_PATCH_INFO_EXC, "OverflowException");
+                       ia64_br_cond_pred (code, 7, 0);
                        break;
                case OP_IA64_COND_EXC:
-                       /* FIXME: */
-                       ia64_break_i_pred (code, 6, 0);
+                       mono_add_patch_info (cfg, code.buf - cfg->native_code,
+                                                                MONO_PATCH_INFO_EXC, ins->inst_p1);
+                       ia64_br_cond_pred (code, 6, 0);
                        break;
                case OP_IA64_CSET:
                        /* FIXME: Do this with one instruction ? */
@@ -2071,6 +2321,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        /* FIXME: Is this needed */
                        ia64_zxt2 (code, ins->dreg, ins->sreg1);
                        break;
+               case CEE_CONV_U4:
+                       /* FIXME: Is this needed */
+                       ia64_zxt4 (code, ins->dreg, ins->sreg1);
+                       break;
                case CEE_CONV_I8:
                case CEE_CONV_I:
                        /* FIXME: Sign extend ? */
@@ -2080,10 +2334,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case CEE_CONV_U:
                        ia64_zxt4 (code, ins->dreg, ins->sreg1);
                        break;
-               case CEE_CONV_OVF_U4:
-                       /* FIXME: */
-                       ia64_mov (code, ins->dreg, ins->sreg1);
-                       break;
 
                        /*
                         * FLOAT OPCODES
@@ -2147,19 +2397,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        ia64_fcvt_xf (code, ins->dreg, ins->dreg);
                        ia64_fnorm_d_sf (code, ins->dreg, ins->dreg, 0);
                        break;
+               case OP_LCONV_TO_R4:
+                       /* FIXME: Difference with CEE_CONV_R4 ? */
+                       ia64_setf_sig (code, ins->dreg, ins->sreg1);
+                       ia64_fcvt_xf (code, ins->dreg, ins->dreg);
+                       ia64_fnorm_s_sf (code, ins->dreg, ins->dreg, 0);
+                       break;
                case OP_FCONV_TO_R4:
                        ia64_fnorm_s_sf (code, ins->dreg, ins->sreg1, 0);
                        break;
-               case OP_FCONV_TO_I4:
-               case OP_FCONV_TO_I2:
-               case OP_FCONV_TO_U2:
-               case OP_FCONV_TO_U1:
-                       /* FIXME: sign/zero extend ? */
-                       ia64_fcvt_fx_trunc_sf (code, FP_SCRATCH_REG, ins->sreg1, 0);
-                       ia64_getf_sig (code, ins->dreg, FP_SCRATCH_REG);
-                       break;
                case OP_FCONV_TO_I8:
-                       /* FIXME: Difference with OP_FCONV_TO_I4 ? */
                        ia64_fcvt_fx_trunc_sf (code, FP_SCRATCH_REG, ins->sreg1, 0);
                        ia64_getf_sig (code, ins->dreg, FP_SCRATCH_REG);
                        break;
@@ -2175,13 +2422,38 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FNEG:
                        ia64_fmerge_ns (code, ins->dreg, ins->sreg1, ins->sreg1);
                        break;
+               case CEE_CKFINITE:
+                       /* Quiet NaN */
+                       ia64_fclass_m (code, 6, 7, ins->sreg1, 0x080);
+                       mono_add_patch_info (cfg, code.buf - cfg->native_code,
+                                                                MONO_PATCH_INFO_EXC, "ArithmeticException");
+                       ia64_br_cond_pred (code, 6, 0);
+                       /* Signaling NaN */
+                       ia64_fclass_m (code, 6, 7, ins->sreg1, 0x040);
+                       mono_add_patch_info (cfg, code.buf - cfg->native_code,
+                                                                MONO_PATCH_INFO_EXC, "ArithmeticException");
+                       ia64_br_cond_pred (code, 6, 0);
+                       /* Positive infinity */
+                       ia64_fclass_m (code, 6, 7, ins->sreg1, 0x021);
+                       mono_add_patch_info (cfg, code.buf - cfg->native_code,
+                                                                MONO_PATCH_INFO_EXC, "ArithmeticException");
+                       ia64_br_cond_pred (code, 6, 0);
+                       /* Negative infinity */
+                       ia64_fclass_m (code, 6, 7, ins->sreg1, 0x022);
+                       mono_add_patch_info (cfg, code.buf - cfg->native_code,
+                                                                MONO_PATCH_INFO_EXC, "ArithmeticException");
+                       ia64_br_cond_pred (code, 6, 0);
+                       break;
 
                /* Calls */
                case OP_CHECK_THIS:
                        /* ensure ins->sreg1 is not NULL */
                        ia64_ld8 (code, GP_SCRATCH_REG, ins->sreg1);
                        break;
-
+               case OP_ARGLIST:
+                       ia64_adds_imm (code, GP_SCRATCH_REG, cfg->sig_cookie, cfg->frame_reg);
+                       ia64_st8 (code, ins->sreg1, GP_SCRATCH_REG);
+                       break;
                case OP_FCALL:
                case OP_LCALL:
                case OP_VCALL:
@@ -2227,15 +2499,130 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        code = emit_move_return_value (cfg, ins, code);
                        break;
+               case CEE_JMP: {
+                       /*
+                        * Keep in sync with the code in emit_epilog.
+                        */
+
+                       if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
+                               NOT_IMPLEMENTED;
+
+                       g_assert (!cfg->method->save_lmf);
+
+                       /* Load arguments into their original registers */
+                       code = emit_load_volatile_arguments (cfg, code);
+
+                       if (cfg->arch.stack_alloc_size)
+                               ia64_mov (code, IA64_SP, cfg->arch.reg_saved_sp);
+                       ia64_mov_to_ar_i (code, IA64_PFS, cfg->arch.reg_saved_ar_pfs);
+                       ia64_mov_ret_to_br (code, IA64_B0, cfg->arch.reg_saved_b0);
+
+                       mono_add_patch_info (cfg, code.buf - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
+                       ia64_movl (code, GP_SCRATCH_REG, 0);
+                       ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
+                       ia64_br_cond_reg (code, IA64_B6);
+
+                       break;
+               }
+
+               case OP_LOCALLOC:
+                       /* keep alignment */
+                       ia64_adds_imm (code, GP_SCRATCH_REG, MONO_ARCH_FRAME_ALIGNMENT - 1, ins->sreg1);
+                       ia64_movl (code, GP_SCRATCH_REG2, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
+                       ia64_and (code, GP_SCRATCH_REG, GP_SCRATCH_REG, GP_SCRATCH_REG2);
+
+                       ia64_sub (code, IA64_SP, IA64_SP, GP_SCRATCH_REG);
+
+                       /* The first 16 bytes at sp are reserved by the ABI */
+                       ia64_adds_imm (code, ins->dreg, 16, IA64_SP);
+
+                       if (ins->flags & MONO_INST_INIT) {
+                               /* Upper limit */
+                               ia64_add (code, GP_SCRATCH_REG2, ins->dreg, GP_SCRATCH_REG);
+
+                               /* Init loop */
+                               ia64_st8_inc_imm_hint (code, ins->dreg, IA64_R0, 8, 0);
+                               ia64_cmp_lt (code, 8, 9, ins->dreg, GP_SCRATCH_REG2);
+                               ia64_br_cond_pred (code, 8, -2);
+
+                               ia64_sub (code, ins->dreg, GP_SCRATCH_REG2, GP_SCRATCH_REG);
+                       }
+
+                       break;
 
                        /* Exception handling */
                case OP_CALL_HANDLER:
-                       /* FIXME: */
+                       /*
+                        * Using a call instruction would mess up the register stack, so
+                        * save the return address to a register and use a
+                        * branch.
+                        */
+                       ia64_mov (code, IA64_R15, IA64_R0);
+                       ia64_mov_from_ip (code, GP_SCRATCH_REG);
+                       /* Add the length of OP_CALL_HANDLER */
+                       ia64_adds_imm (code, GP_SCRATCH_REG, 5 * 16, GP_SCRATCH_REG);
+                       mono_add_patch_info (cfg, code.buf - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
+                       ia64_movl (code, GP_SCRATCH_REG2, 0);
+                       ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG2);
+                       ia64_br_cond_reg (code, IA64_B6);
                        break;
+               case OP_START_HANDLER: {
+                       /*
+                        * We receive the return address in GP_SCRATCH_REG.
+                        */
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
 
+                       /* 
+                        * We might be called by the exception handling code, in which case the
+                        * the register stack is not set up correctly. So do it now.
+                        */
+                       ia64_alloc (code, GP_SCRATCH_REG2, cfg->arch.reg_local0 - cfg->arch.reg_in0, cfg->arch.reg_out0 - cfg->arch.reg_local0, cfg->arch.n_out_regs, 0);
+
+                       /* Set the fp register from the value passed in by the caller */
+                       /* R15 is used since it is writable using libunwind */
+                       /* R15 == 0 means we are called by OP_CALL_HANDLER or via resume_context () */
+                       ia64_cmp_eq (code, 6, 7, IA64_R15, IA64_R0);
+                       ia64_add_pred (code, 7, cfg->frame_reg, IA64_R0, IA64_R15);
+
+                       ia64_adds_imm (code, GP_SCRATCH_REG2, spvar->inst_offset, cfg->frame_reg);
+                       ia64_st8_hint (code, GP_SCRATCH_REG2, GP_SCRATCH_REG, 0);
+
+                       break;
+               }
+               case CEE_ENDFINALLY: {
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       ia64_adds_imm (code, GP_SCRATCH_REG, spvar->inst_offset, cfg->frame_reg);
+                       ia64_ld8_hint (code, GP_SCRATCH_REG, GP_SCRATCH_REG, 0);
+                       ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
+                       ia64_br_cond_reg (code, IA64_B6);
+                       break;
+               }
+               case OP_ENDFILTER: {
+                       /* FIXME: Return the value */
+                       MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
+                       ia64_adds_imm (code, GP_SCRATCH_REG, spvar->inst_offset, cfg->frame_reg);
+                       ia64_ld8_hint (code, GP_SCRATCH_REG, GP_SCRATCH_REG, 0);
+                       ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
+                       ia64_br_cond_reg (code, IA64_B6);
+                       break;
+               }
                case CEE_THROW:
-                       /* FIXME: */
-                       ia64_break_i (code, 0x1234);
+                       ia64_mov (code, cfg->arch.reg_out0, ins->sreg1);
+                       code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+                                                         (gpointer)"mono_arch_throw_exception");
+
+                       /* 
+                        * This might be the last instruction in the method, so add a dummy
+                        * instruction so the unwinder will work.
+                        */
+                       ia64_break_i (code, 0);
+                       break;
+               case OP_RETHROW:
+                       ia64_mov (code, cfg->arch.reg_out0, ins->sreg1);
+                       code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
+                                                         (gpointer)"mono_arch_rethrow_exception");
+
+                       ia64_break_i (code, 0);
                        break;
 
                default:
@@ -2302,6 +2689,111 @@ static Ia64InsType ins_types_in_template [32][3] = {
        {0, 0, 0}
 };
 
+static gboolean stops_in_template [32][3] = {
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+       { FALSE, TRUE, FALSE },
+       { FALSE, TRUE, TRUE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, FALSE },
+
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+       { TRUE, FALSE, FALSE },
+       { TRUE, FALSE, TRUE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, TRUE },
+       { FALSE, FALSE, FALSE },
+       { FALSE, FALSE, FALSE }
+};
+
+void
+ia64_emit_bundle (Ia64CodegenState *code, gboolean flush)
+{
+       int i, j, ins_type, template;
+
+       if (!code->automatic) {
+               if (code->nins == 0)
+                       return;
+
+               g_assert (code->nins == 3);
+
+               /* Verify template is correct */
+               template = code->template;
+               for (j = 0; j < 3; ++j) {
+                       if (code->stops [j])
+                               g_assert (stops_in_template [template]);
+
+                       ins_type = ins_types_in_template [template][j];
+                       switch (code->itypes [j]) {
+                       case IA64_INS_TYPE_A:
+                               g_assert ((ins_type == IA64_INS_TYPE_I) || (ins_type == IA64_INS_TYPE_M));
+                               break;
+                       case IA64_INS_TYPE_LX:
+                               g_assert (j == 1);
+                               g_assert (ins_type == IA64_INS_TYPE_LX);
+                               j ++;
+                               break;
+                       default:
+                               g_assert (ins_type == code->itypes [j]);
+                       }
+               }
+
+               ia64_emit_bundle_template (code, template, code->instructions [0], code->instructions [1], code->instructions [2]);
+               code->template = 0;
+               code->nins = 0;
+               return;
+       }
+
+       for (i = 0; i < code->nins; ++i) {
+               switch (code->itypes [i]) {
+               case IA64_INS_TYPE_A:
+                       ia64_emit_bundle_template (code, IA64_TEMPLATE_MIIS, code->instructions [i], IA64_NOP_I, IA64_NOP_I);
+                       break;
+               case IA64_INS_TYPE_I:
+                       ia64_emit_bundle_template (code, IA64_TEMPLATE_MIIS, IA64_NOP_M, code->instructions [i], IA64_NOP_I);
+                       break;
+               case IA64_INS_TYPE_M:
+                       ia64_emit_bundle_template (code, IA64_TEMPLATE_MIIS, code->instructions [i], IA64_NOP_I, IA64_NOP_I);
+                       break;
+               case IA64_INS_TYPE_B:
+                       ia64_emit_bundle_template (code, IA64_TEMPLATE_MIBS, IA64_NOP_M, IA64_NOP_I, code->instructions [i]);
+                       break;
+               case IA64_INS_TYPE_F:
+                       ia64_emit_bundle_template (code, IA64_TEMPLATE_MFIS, IA64_NOP_M, code->instructions [i], IA64_NOP_I);
+                       break;
+               case IA64_INS_TYPE_LX:
+                       ia64_emit_bundle_template (code, IA64_TEMPLATE_MLXS, IA64_NOP_M, code->instructions [i], code->instructions [i + 1]);
+                       i ++;
+                       break;
+               default:
+                       g_assert_not_reached ();
+               }
+       }
+
+       code->nins = 0;
+}
+
 static void 
 ia64_patch (unsigned char* code, gpointer target)
 {
@@ -2418,12 +2910,13 @@ guint8 *
 mono_arch_emit_prolog (MonoCompile *cfg)
 {
        MonoMethod *method = cfg->method;
-       MonoBasicBlock *bb;
        MonoMethodSignature *sig;
        MonoInst *inst;
-       int alloc_size, pos, max_offset, i;
+       int alloc_size, pos, i;
        Ia64CodegenState code;
        CallInfo *cinfo;
+       unw_dyn_region_info_t *r_pro;
+       int unw_op_count;
 
        sig = mono_method_signature (method);
        pos = 0;
@@ -2434,9 +2927,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        cfg->native_code = g_malloc (cfg->code_size);
 
        ia64_codegen_init (code, cfg->native_code);
-
-       ia64_alloc (code, cfg->arch.reg_saved_ar_pfs, cfg->arch.reg_local0 - cfg->arch.reg_in0, cfg->arch.reg_out0 - cfg->arch.reg_local0, cfg->arch.n_out_regs, 0);
-       ia64_mov_from_br (code, cfg->arch.reg_saved_b0, IA64_B0);
+       ia64_codegen_set_automatic (code, FALSE);
 
        alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
        if (cfg->param_area)
@@ -2446,6 +2937,10 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                alloc_size += 16;
        alloc_size = ALIGN_TO (alloc_size, MONO_ARCH_FRAME_ALIGNMENT);
 
+       if (cfg->flags & MONO_CFG_HAS_ALLOCA)
+               /* Force sp to be saved/restored */
+               alloc_size += MONO_ARCH_FRAME_ALIGNMENT;
+
        cfg->arch.stack_alloc_size = alloc_size;
 
        pos = 0;
@@ -2456,47 +2951,61 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 
        alloc_size -= pos;
 
-       if (alloc_size || cinfo->stack_usage)
+       /* Initialize unwind info */
+       r_pro = g_malloc0 (_U_dyn_region_info_size (3));
+       unw_op_count = 0;
+
+       ia64_begin_bundle_template (code, IA64_TEMPLATE_MIIS);
+       ia64_alloc (code, cfg->arch.reg_saved_ar_pfs, cfg->arch.reg_local0 - cfg->arch.reg_in0, cfg->arch.reg_out0 - cfg->arch.reg_local0, cfg->arch.n_out_regs, 0);
+       ia64_mov_from_br (code, cfg->arch.reg_saved_b0, IA64_B0);
+
+       _U_dyn_op_save_reg (&r_pro->op[unw_op_count++], _U_QP_TRUE, /* when=*/ 0,
+                                               /* reg=*/ UNW_IA64_AR_PFS, /* dst=*/ UNW_IA64_GR + cfg->arch.reg_saved_ar_pfs);
+       _U_dyn_op_save_reg (&r_pro->op[unw_op_count++], _U_QP_TRUE, /* when=*/ 1,
+                                               /* reg=*/ UNW_IA64_RP, /* dst=*/ UNW_IA64_GR + cfg->arch.reg_saved_b0);
+
+       if (alloc_size || cinfo->stack_usage) {
                ia64_mov (code, cfg->frame_reg, IA64_SP);
+               _U_dyn_op_save_reg (&r_pro->op[unw_op_count++], _U_QP_TRUE, /* when=*/ 2,
+                                                       /* reg=*/ UNW_IA64_SP, /* dst=*/ UNW_IA64_GR + cfg->frame_reg);
+       }
+       else
+               ia64_nop_i (code, 0);
+       ia64_stop (code);
+       ia64_end_bundle (code);
+
+       /* Finish unwind info */
+       r_pro->op_count = unw_op_count;
+       r_pro->insn_count = (code.buf - cfg->native_code) >> 4;
+
+       cfg->arch.r_pro = r_pro;
 
        if (alloc_size) {
                /* See mono_emit_stack_alloc */
 #if defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
                NOT_IMPLEMENTED;
 #else
-               ia64_mov (code, cfg->arch.reg_saved_sp, IA64_SP);
 
-               if (ia64_is_imm14 (-alloc_size))
+               if (ia64_is_imm14 (-alloc_size)) {
+                       ia64_begin_bundle_template (code, IA64_TEMPLATE_MISI);
+                       ia64_nop_m (code, 0);
+                       ia64_mov (code, cfg->arch.reg_saved_sp, IA64_SP); ia64_stop (code);
                        ia64_adds_imm (code, IA64_SP, (-alloc_size), IA64_SP);
+                       ia64_end_bundle (code);
+               }
                else {
-                       ia64_movl (code, GP_SCRATCH_REG, -alloc_size);
+                       ia64_begin_bundle_template (code, IA64_TEMPLATE_MLXS);
+                       ia64_mov (code, cfg->arch.reg_saved_sp, IA64_SP);
+                       ia64_movl (code, GP_SCRATCH_REG, -alloc_size); ia64_stop (code);
+                       ia64_begin_bundle_template (code, IA64_TEMPLATE_MIIS);
                        ia64_add (code, IA64_SP, GP_SCRATCH_REG, IA64_SP);
+                       ia64_nop_i (code, 0);
+                       ia64_nop_i (code, 0); ia64_stop (code);
+                       ia64_end_bundle (code);
                }
 #endif
        }
-
-       /* compute max_offset in order to use short forward jumps */
-       max_offset = 0;
-       if (cfg->opt & MONO_OPT_BRANCH) {
-               for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
-                       MonoInst *ins = bb->code;
-                       bb->max_offset = max_offset;
-
-                       if (cfg->prof_options & MONO_PROFILE_COVERAGE)
-                               max_offset += 6;
-                       /* max alignment for loops */
-                       if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
-                               max_offset += LOOP_ALIGNMENT;
-
-                       while (ins) {
-                               if (ins->opcode == OP_LABEL)
-                                       ins->inst_c1 = max_offset;
-                               
-                               max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
-                               ins = ins->next;
-                       }
-               }
-       }
+       ia64_codegen_set_automatic (code, TRUE);
 
        if (sig->ret->type != MONO_TYPE_VOID) {
                if ((cinfo->ret.storage == ArgInIReg) && (cfg->ret->opcode != OP_REGVAR)) {
@@ -2559,6 +3068,10 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                                if (inst->dreg != cfg->arch.reg_in0 + ainfo->reg)
                                        ia64_mov (code, inst->dreg, cfg->arch.reg_in0 + ainfo->reg);
                                break;
+                       case ArgOnStack:
+                               ia64_adds_imm (code, GP_SCRATCH_REG, 16 + ainfo->offset, cfg->frame_reg);
+                               ia64_ld8 (code, inst->dreg, GP_SCRATCH_REG);
+                               break;
                        default:
                                NOT_IMPLEMENTED;
                        }
@@ -2580,6 +3093,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
 
        g_assert (cfg->code_len < cfg->code_size);
 
+       cfg->arch.prolog_end_offset = cfg->code_len;
+
        return code.buf;
 }
 
@@ -2593,12 +3108,16 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        guint8 *buf;
        CallInfo *cinfo;
 
+       cfg->arch.epilog_begin_offset = cfg->code_len;
+
        while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
                cfg->code_size *= 2;
                cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
                mono_jit_stats.code_reallocs++;
        }
 
+       /* FIXME: Emit unwind info */
+
        buf = cfg->native_code + cfg->code_len;
 
        if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
@@ -2620,13 +3139,25 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        }
        g_free (cinfo);
 
+       ia64_end_bundle (code);
+       ia64_codegen_set_automatic (code, FALSE);
+
+       ia64_begin_bundle_template (code, IA64_TEMPLATE_MIIS);
        if (cfg->arch.stack_alloc_size)
                ia64_mov (code, IA64_SP, cfg->arch.reg_saved_sp);
-
+       else
+               ia64_nop_m (code, 0);
        ia64_mov_to_ar_i (code, IA64_PFS, cfg->arch.reg_saved_ar_pfs);
-       ia64_mov_ret_to_br (code, IA64_B0, cfg->arch.reg_saved_b0);
+       ia64_mov_ret_to_br (code, IA64_B0, cfg->arch.reg_saved_b0); ia64_stop (code);
+       ia64_end_bundle (code);
+
+       ia64_begin_bundle_template (code, IA64_TEMPLATE_BBBS);
        ia64_br_ret_reg (code, IA64_B0);
+       ia64_nop_b (code, 0);
+       ia64_nop_b (code, 0); ia64_stop (code);
+       ia64_end_bundle (code);
 
+       ia64_codegen_set_automatic (code, TRUE);
        ia64_codegen_close (code);
 
        cfg->code_len = code.buf - cfg->native_code;
@@ -2640,6 +3171,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
        MonoJumpInfo *patch_info;
        int nthrows;
        Ia64CodegenState code;
+       gboolean empty = TRUE;
        /*
        MonoClass *exc_classes [16];
        guint8 *exc_throw_start [16], *exc_throw_end [16];
@@ -2649,27 +3181,69 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
        /* Compute needed space */
        for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
                if (patch_info->type == MONO_PATCH_INFO_EXC)
-                       code_size += 40;
+                       code_size += 256;
                if (patch_info->type == MONO_PATCH_INFO_R8)
                        code_size += 8 + 7; /* sizeof (double) + alignment */
                if (patch_info->type == MONO_PATCH_INFO_R4)
                        code_size += 4 + 7; /* sizeof (float) + alignment */
        }
 
+       while (cfg->code_len + code_size > (cfg->code_size - 16)) {
+               cfg->code_size *= 2;
+               cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
+               mono_jit_stats.code_reallocs++;
+       }
+
        ia64_codegen_init (code, cfg->native_code + cfg->code_len);
 
        /* add code to raise exceptions */
+       /* FIXME: Optimize this */
        nthrows = 0;
        for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
                switch (patch_info->type) {
                case MONO_PATCH_INFO_EXC: {
-                       NOT_IMPLEMENTED;
-               default:
+                       MonoClass *exc_class;
+                       guint8* throw_ip;
+                       guint8* buf;
+
+                       exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
+                       g_assert (exc_class);
+                       throw_ip = cfg->native_code + patch_info->ip.i;
+
+                       ia64_patch (cfg->native_code + patch_info->ip.i, code.buf);
+
+                       ia64_movl (code, cfg->arch.reg_out0 + 0, exc_class->type_token);
+
+                       ia64_begin_bundle (code);
+
+                       patch_info->data.name = "mono_arch_throw_corlib_exception";
+                       patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
+                       patch_info->ip.i = code.buf - cfg->native_code;
+
+                       /* Indirect call */
+                       ia64_movl (code, GP_SCRATCH_REG, 0);
+                       ia64_ld8_inc_imm (code, GP_SCRATCH_REG2, GP_SCRATCH_REG, 8);
+                       ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG2);
+                       ia64_ld8 (code, IA64_GP, GP_SCRATCH_REG);
+
+                       /* Compute the offset */
+                       buf = code.buf + 32;
+                       ia64_movl (code, cfg->arch.reg_out0 + 1, buf - throw_ip);
+
+                       ia64_br_call_reg (code, IA64_B0, IA64_B6);
+
+                       empty = FALSE;
                        break;
                }
+               default:
+                       break;
                }
        }
 
+       if (!empty)
+               /* The unwinder needs this to work */
+               ia64_break_i (code, 0);
+
        ia64_codegen_close (code);
 
        cfg->code_len = code.buf - cfg->native_code;
@@ -2693,6 +3267,24 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena
        return NULL;
 }
 
+void
+mono_arch_save_unwind_info (MonoCompile *cfg)
+{
+       unw_dyn_info_t *di;
+
+       /* FIXME: Unregister this for dynamic methods */
+
+       di = g_malloc0 (sizeof (unw_dyn_info_t));
+       di->start_ip = (unw_word_t) cfg->native_code;
+       di->end_ip = (unw_word_t) cfg->native_code + cfg->code_len;
+       di->gp = 0;
+       di->format = UNW_INFO_FORMAT_DYNAMIC;
+       di->u.pi.name_ptr = (unw_word_t)mono_method_full_name (cfg->method, TRUE);
+       di->u.pi.regions = cfg->arch.r_pro;
+
+       _U_dyn_register (di);
+}
+
 void
 mono_arch_flush_icache (guint8 *code, gint size)
 {
@@ -2708,7 +3300,7 @@ mono_arch_flush_icache (guint8 *code, gint size)
 void
 mono_arch_flush_register_windows (void)
 {
-       NOT_IMPLEMENTED;
+       /* Not needed because of libunwind */
 }
 
 gboolean 
@@ -2726,8 +3318,7 @@ mono_arch_is_inst_imm (gint64 imm)
 gboolean
 mono_arch_is_int_overflow (void *sigctx, void *info)
 {
-       NOT_IMPLEMENTED;
-
+       /* Division is emulated with explicit overflow checks */
        return FALSE;
 }