Mon Oct 15 10:37:15 CEST 2007 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-alpha.c
index 7e5388c1f4dda97ea0a7acd94a4a422f4ba74f62..3707331f9ecdda6a75d2e4e23c62fd49194a3051 100644 (file)
    insert_after_ins (bb, last_ins, (dest)); \
 } while (0)
 
+#define NEW_ICONST(cfg,dest,val) do {                                  \
+    (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));  \
+    (dest)->opcode = OP_ICONST;                                                \
+    (dest)->inst_c0 = (val);                                           \
+    (dest)->type = STACK_I4;                                           \
+  } while (0)
+
 
 #undef DEBUG
 #define DEBUG(a) if (cfg->verbose_level > 1) a
@@ -75,8 +82,7 @@
 static int indent_level = 0;
 
 int mini_alpha_verbose_level = 0;
-
-static const char*const * ins_spec = alpha_desc;
+static int bwx_supported = 0;
 
 static gboolean tls_offset_inited = FALSE;
 
@@ -95,24 +101,23 @@ gpointer mono_arch_get_lmf_addr (void);
 
 typedef enum {
         ArgInIReg,
-        ArgInFloatSSEReg,
-        ArgInDoubleSSEReg,
+        ArgInFloatReg,
+        ArgInDoubleReg,
         ArgOnStack,
-        ArgValuetypeInReg,
-//        ArgOnFloatFpStack,
-//        ArgOnDoubleFpStack,
+       ArgValuetypeInReg, // ??
+       ArgAggregate,
         ArgNone
 } ArgStorage;
 
 
 typedef struct {
-   gint16 offset;
-   gint8  reg;
-   ArgStorage storage;
+  gint16 offset;
+  gint8  reg;
+  ArgStorage storage;
 
-   /* Only if storage == ArgValuetypeInReg */
-   ArgStorage pair_storage [2];
-   gint8 pair_regs [2];
+  /* Only if storage == ArgAggregate */
+  int nregs, nslots;
+  //AggregateType atype; // So far use only AggregateNormal
 } ArgInfo;
 
 typedef struct {
@@ -129,7 +134,7 @@ typedef struct {
    ArgInfo args [1];
 } CallInfo;
 
-static CallInfo* get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke);
+static CallInfo* get_call_info (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gboolean is_pinvoke);
 static unsigned int *emit_call(MonoCompile *cfg, unsigned int *code,
                               guint32 patch_type, gconstpointer data);
 
@@ -180,24 +185,25 @@ add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo,
    {
      /* A double register */
      if (is_double)
-       ainfo->storage = ArgInDoubleSSEReg;
+       ainfo->storage = ArgInDoubleReg;
      else
-       ainfo->storage = ArgInFloatSSEReg;
+       ainfo->storage = ArgInFloatReg;
+
      ainfo->reg = fparam_regs [*gr];
      (*gr) += 1;
    }
 }
 
 static void
-add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
+add_valuetype (MonoGenericSharingContext *ctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
                gboolean is_return,
                guint32 *gr, guint32 *fr, guint32 *stack_size)
 {
-  guint32 size, i;
+  guint32 size;
   MonoClass *klass;
   MonoMarshalType *info;
-  gboolean is_hfa = TRUE;
-  guint32 hfa_type = 0;
+  //gboolean is_hfa = TRUE;
+  //guint32 hfa_type = 0;
 
   klass = mono_class_from_mono_type (type);
   if (type->type == MONO_TYPE_TYPEDBYREF)
@@ -205,9 +211,9 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
   else if (sig->pinvoke)
     size = mono_type_native_stack_size (&klass->byval_arg, NULL);
   else
-    size = mono_type_stack_size (&klass->byval_arg, NULL);
+    size = mini_type_stack_size (gsctx, &klass->byval_arg, NULL);
 
-  if (!sig->pinvoke || (size == 0)) {
+  if (!sig->pinvoke || (size == 0) || is_return) {
     /* Allways pass in memory */
     ainfo->offset = *stack_size;
     *stack_size += ALIGN_TO (size, 8);
@@ -216,18 +222,40 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
     return;
   }
 
-        info = mono_marshal_load_type_info (klass);
-        g_assert (info);
-        if (info->native_size/* > 16*/) {
-                ainfo->offset = *stack_size;
-                *stack_size += ALIGN_TO (info->native_size, 8);
-                ainfo->storage = ArgOnStack;
+  info = mono_marshal_load_type_info (klass);
+  g_assert (info);
 
-                return;
-        }
+  ainfo->storage = ArgAggregate;
+  //ainfo->atype = AggregateNormal;
 
+#if 0
+  /* This also handles returning of TypedByRef used by some icalls */
+  if (is_return) {
+    if (size <= 32) {
+      ainfo->reg = IA64_R8;
+      ainfo->nregs = (size + 7) / 8;
+      ainfo->nslots = ainfo->nregs;
+      return;
+    }
+    NOT_IMPLEMENTED;
+  }
+#endif
+
+  ainfo->reg =  param_regs [*gr];
+  ainfo->offset = *stack_size;
+  ainfo->nslots = (size + 7) / 8;
+
+  if (((*gr) + ainfo->nslots) <= 6) {
+    /* Fits entirely in registers */
+    ainfo->nregs = ainfo->nslots;
+    (*gr) += ainfo->nregs;
+    return;
+  }
+
+  ainfo->nregs = 6 - (*gr);
+  (*gr) = 6;
+  (*stack_size) += (ainfo->nslots - ainfo->nregs) * 8;
 
-  NOT_IMPLEMENTED("add_valuetype: more");
 }
 
 // This function is called from mono_arch_call_opcode and
@@ -237,7 +265,7 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
 // that will be used in calls
 static void
 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, MonoInst *arg,
-                               ArgStorage storage, int reg, MonoInst *tree)
+               ArgStorage storage, int reg, MonoInst *tree)
 {
   switch (storage)
     {
@@ -245,21 +273,21 @@ add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, MonoInst *arg,
       arg->opcode = OP_OUTARG_REG;
       arg->inst_left = tree;
       arg->inst_right = (MonoInst*)call;
-      arg->unused = reg;
+      arg->backend.reg3 = reg;
       call->used_iregs |= 1 << reg;
       break;
-    case ArgInFloatSSEReg:
+    case ArgInFloatReg:
       arg->opcode = OP_OUTARG_FREG;
       arg->inst_left = tree;
       arg->inst_right = (MonoInst*)call;
-      arg->unused = reg;
+      arg->backend.reg3 = reg;
       call->used_fregs |= 1 << reg;
       break;
-    case ArgInDoubleSSEReg:
+    case ArgInDoubleReg:
       arg->opcode = OP_OUTARG_FREG;
       arg->inst_left = tree;
       arg->inst_right = (MonoInst*)call;
-      arg->unused = reg;
+      arg->backend.reg3 = reg;
       call->used_fregs |= 1 << reg;
       break;
     default:
@@ -366,7 +394,7 @@ mono_arch_create_vars (MonoCompile *cfg)
    
   sig = mono_method_signature (cfg->method);
    
-  cinfo = get_call_info (sig, FALSE);
+  cinfo = get_call_info (cfg->generic_sharing_context, sig, FALSE);
    
   if (cinfo->ret.storage == ArgValuetypeInReg)
     cfg->ret_var_is_local = TRUE;
@@ -417,6 +445,8 @@ static void
   MonoInst *ins, *last_ins = NULL;
   ins = bb->code;
    
+  CFG_DEBUG(3) g_print ("ALPHA: PEEPHOLE pass\n");
+
   while (ins) 
     {  
       switch (ins->opcode) 
@@ -477,6 +507,7 @@ static void
          break;
 
        case OP_LOADI8_MEMBASE:
+       case OP_LOAD_MEMBASE:
           /*
            * Note: if reg1 = reg2 the load op is removed
            *
@@ -486,7 +517,9 @@ static void
            * OP_STOREI8_MEMBASE_REG reg1, offset(basereg)
            * OP_MOVE reg1, reg2
            */
-          if (last_ins && (last_ins->opcode == OP_STOREI8_MEMBASE_REG) &&
+          if (last_ins &&
+             (last_ins->opcode == OP_STOREI8_MEMBASE_REG ||
+               last_ins->opcode == OP_STORE_MEMBASE_REG) &&
               ins->inst_basereg == last_ins->inst_destbasereg &&
               ins->inst_offset == last_ins->inst_offset)
             {
@@ -921,10 +954,8 @@ static void
    
    ins = bb->code;
    
-   if (bb->max_ireg > cfg->rs->next_vireg)
-        cfg->rs->next_vireg = bb->max_ireg;
-   if (bb->max_freg > cfg->rs->next_vfreg)
-        cfg->rs->next_vfreg = bb->max_freg;
+   if (bb->max_vreg > cfg->rs->next_vreg)
+        cfg->rs->next_vreg = bb->max_vreg;
    
    /*
     * FIXME: Need to add more instructions, but the current machine
@@ -1019,22 +1050,6 @@ static void
 
            break;
 
-          /*
-            case OP_LOAD_MEMBASE:
-            case OP_LOADI8_MEMBASE:
-            if (!amd64_is_imm32 (ins->inst_offset)) 
-            {
-            
-            NEW_INS (cfg, temp, OP_I8CONST);
-            temp->inst_c0 = ins->inst_offset;
-            temp->dreg = mono_regstate_next_int (cfg->rs);
-            ins->opcode = OP_AMD64_LOADI8_MEMINDEX;
-            ins->inst_indexreg = temp->dreg;
-            }
-                        
-            break;
-          */
-
         case OP_STORE_MEMBASE_IMM:
         case OP_STOREI8_MEMBASE_IMM:
           if (ins->inst_imm != 0) 
@@ -1060,6 +1075,7 @@ static void
           break;
 
          case OP_STOREI1_MEMBASE_IMM:
+          if (ins->inst_imm != 0 || !bwx_supported)
              {
                MonoInst *temp;
                NEW_INS (cfg, temp, OP_ICONST);
@@ -1071,6 +1087,7 @@ static void
            break;
 
          case OP_STOREI2_MEMBASE_IMM:
+           if (ins->inst_imm != 0 || !bwx_supported)
           {
             MonoInst *temp;
             NEW_INS (cfg, temp, OP_ICONST);
@@ -1170,6 +1187,19 @@ static void
               ins->sreg2 = temp->dreg;
               ins->opcode = OP_LSHR;
             }
+          break;
+         case OP_LSHL_IMM:
+           if (!alpha_is_imm(ins->inst_imm))
+             {
+               MonoInst *temp;
+               NEW_INS(cfg, temp, OP_ICONST);
+               temp->inst_c0 = ins->inst_imm;
+               temp->dreg = mono_regstate_next_int(cfg->rs);
+               ins->sreg2 = temp->dreg;
+               ins->opcode = OP_LSHL;
+             }
+           break;
+
         default:
           break;
         }
@@ -1180,8 +1210,7 @@ static void
    
    bb->last_ins = last_ins;
    
-   bb->max_ireg = cfg->rs->next_vireg;
-   bb->max_freg = cfg->rs->next_vfreg;
+   bb->max_vreg = cfg->rs->next_vreg;
 }
 
 /*------------------------------------------------------------------*/
@@ -1312,7 +1341,7 @@ emit_load_volatile_arguments (MonoCompile *cfg, unsigned int *code)
 
   sig = mono_method_signature (method);
 
-  cinfo = get_call_info (sig, FALSE);
+  cinfo = get_call_info (cfg->generic_sharing_context, sig, FALSE);
 
   if (sig->ret->type != MONO_TYPE_VOID) {
     if ((cinfo->ret.storage == ArgInIReg) &&
@@ -1326,7 +1355,7 @@ emit_load_volatile_arguments (MonoCompile *cfg, unsigned int *code)
   for (i = 0; i < sig->param_count + sig->hasthis; ++i)
     {
       ArgInfo *ainfo = &cinfo->args [i];
-      MonoInst *inst = cfg->varinfo [i];
+      MonoInst *inst = cfg->args [i];
 
       switch(ainfo->storage)
        {
@@ -1344,8 +1373,8 @@ emit_load_volatile_arguments (MonoCompile *cfg, unsigned int *code)
          }
          //}
          break;
-       case ArgInDoubleSSEReg:
-       case ArgInFloatSSEReg:
+       case ArgInDoubleReg:
+       case ArgInFloatReg:
          // We need to save all used af0-af5 params
          //for (i=0; i<PARAM_REGS; i++)
          //  {
@@ -1353,11 +1382,11 @@ emit_load_volatile_arguments (MonoCompile *cfg, unsigned int *code)
          {
            switch(cinfo->args[i].storage)
              {
-             case ArgInFloatSSEReg:
+             case ArgInFloatReg:
                //alpha_sts(code, ainfo->reg, alpha_fp, offset);
                alpha_lds(code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
                break;
-             case ArgInDoubleSSEReg:
+             case ArgInDoubleReg:
                //alpha_stt(code, ainfo->reg, alpha_fp, offset);
                alpha_ldt(code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
                break;
@@ -1453,12 +1482,25 @@ mono_arch_emit_prolog (MonoCompile *cfg)
        // Save method
        alpha_stq(code, alpha_pv, alpha_fp,
                  (lmf_offset + G_STRUCT_OFFSET(MonoLMF, method)));
-
      }
+
+   /* Save (global) regs */
+   offset = cfg->arch.reg_save_area_offset;
+
+   for (i = 0; i < MONO_MAX_IREGS; ++i)
+     if (ALPHA_IS_CALLEE_SAVED_REG (i) &&
+         (cfg->used_int_regs & (1 << i)) &&
+         !( ALPHA_ARGS_REGS & (1 << i)) )
+       {
+         alpha_stq(code, i, alpha_fp, offset);
+         CFG_DEBUG(3) g_print("ALPHA: Saved caller reg %d at offset: %0x\n",
+                             i, offset);
+         offset += 8;
+       }
    
    offset = cfg->arch.args_save_area_offset;
 
-   cinfo = get_call_info (sig, FALSE);
+   cinfo = get_call_info (cfg->generic_sharing_context, sig, FALSE);
 
    if (sig->ret->type != MONO_TYPE_VOID)
      {
@@ -1475,40 +1517,55 @@ mono_arch_emit_prolog (MonoCompile *cfg)
    for (i = 0; i < sig->param_count + sig->hasthis; ++i)
      {
        ArgInfo *ainfo = &cinfo->args [i];
-       MonoInst *inst = cfg->varinfo [i];
+       MonoInst *inst = cfg->args [i];
+       int j;
 
        switch(ainfo->storage)
         {
         case ArgInIReg:
           // We need to save all used a0-a5 params
-          //for (i=0; i<PARAM_REGS; i++)
-          //  {
-          //    if (i < cinfo->reg_usage)
           {
-            //alpha_stq(code, ainfo->reg, alpha_fp, offset);
-            alpha_stq(code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
+            if (inst->opcode == OP_REGVAR)
+              {
+                alpha_mov1(code, ainfo->reg, inst->dreg);
+                CFG_DEBUG(3) g_print("ALPHA: Saved int arg reg %d in reg %d\n",
+                                     ainfo->reg, inst->dreg);
+              }
+            else
+              {
+                alpha_stq(code, ainfo->reg, inst->inst_basereg,
+                          inst->inst_offset);
                   
-            CFG_DEBUG(3) g_print("ALPHA: Saved int arg reg %d at offset: %0lx\n",
-                                 ainfo->reg, inst->inst_offset/*offset*/);
+                CFG_DEBUG(3) g_print("ALPHA: Saved int arg reg %d at offset: %0lx\n",
+                                     ainfo->reg, inst->inst_offset);
                   
-            offset += 8;
+                offset += 8;
+              }
+          }
+          break;
+        case ArgAggregate:
+          {
+            for(j=0; j<ainfo->nregs; j++)
+              {
+                CFG_DEBUG(3) g_print("ALPHA: Saved aggregate arg reg %d at offset: %0lx\n",
+                                     ainfo->reg + j, inst->inst_offset + (8*j));
+                alpha_stq(code, (ainfo->reg+j), inst->inst_basereg,
+                          (inst->inst_offset + (8*j)));
+                offset += 8;
+              }
           }
-          //}
           break;
-        case ArgInDoubleSSEReg:
-        case ArgInFloatSSEReg:
+        case ArgInDoubleReg:
+        case ArgInFloatReg:
           // We need to save all used af0-af5 params
-          //for (i=0; i<PARAM_REGS; i++)
-          //  {
-          //    if (i < cinfo->freg_usage)
           {
             switch(cinfo->args[i].storage)
               {
-              case ArgInFloatSSEReg:
+              case ArgInFloatReg:
                 //alpha_sts(code, ainfo->reg, alpha_fp, offset);
                 alpha_sts(code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
                 break;
-              case ArgInDoubleSSEReg:
+              case ArgInDoubleReg:
                 //alpha_stt(code, ainfo->reg, alpha_fp, offset);
                 alpha_stt(code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
                 break;
@@ -1525,8 +1582,9 @@ mono_arch_emit_prolog (MonoCompile *cfg)
      }
 
    offset = cfg->arch.reg_save_area_offset;
-   
-   for (i = 0; i < MONO_MAX_IREGS; ++i)
+
+   /*   
+   for (i = 0; i < MONO_MAX_VREGS; ++i)
      if (ALPHA_IS_CALLEE_SAVED_REG (i) &&
         (cfg->used_int_regs & (1 << i)) &&
         !( ALPHA_ARGS_REGS & (1 << i)) )
@@ -1536,7 +1594,7 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                i, offset);
         offset += 8;
        }
-
+   */
    // TODO - check amd64 code for "Might need to attach the thread to the JIT"
 
    if (method->save_lmf)
@@ -1612,10 +1670,18 @@ mono_arch_flush_register_windows (void)
 guint32
 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
 {
+  MonoInst *ins = cfg->varinfo [vmv->idx];
+
    /* FIXME: */
   CFG_DEBUG(2) ALPHA_DEBUG("mono_arch_regalloc_cost");
 
-  return 2;
+  if (cfg->method->save_lmf)
+    /* The register is already saved */
+    /* substract 1 for the invisible store in the prolog */
+    return (ins->opcode == OP_ARG) ? 1 : 0;
+  else
+    /* push+pop */
+    return (ins->opcode == OP_ARG) ? 2 : 1;
 }
 
 /*========================= End of Function ========================*/
@@ -1676,7 +1742,7 @@ mono_arch_get_argument_info (MonoMethodSignature *csig,
                              MonoJitArgumentInfo *arg_info)
 {
   int k;
-  CallInfo *cinfo = get_call_info (csig, FALSE);
+  CallInfo *cinfo = get_call_info (NULL, csig, FALSE);
   guint32 args_size = cinfo->stack_usage;
 
   ALPHA_DEBUG("mono_arch_get_argument_info");
@@ -1714,11 +1780,11 @@ void
 mono_arch_emit_epilog (MonoCompile *cfg)
 {
   MonoMethod *method = cfg->method;
-  int quad, offset, i;
+  int offset, i;
   unsigned int *code;
   int max_epilog_size = 128;
   int stack_size = cfg->arch.stack_size;
-  CallInfo *cinfo;
+  //  CallInfo *cinfo;
   gint32 lmf_offset = cfg->arch.lmf_offset;
   
   CFG_DEBUG(2) ALPHA_DEBUG("mono_arch_emit_epilog");
@@ -2190,7 +2256,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
      {
        offset = ((char *)code) - ((char *)cfg->native_code);
          
-       max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
+       max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
          
        if (offset > (cfg->code_size - max_len - 16))
         {
@@ -2211,15 +2277,22 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           // Shift 64 bit value right
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [long_shr] dreg=%d, sreg1=%d, sreg2=%d\n",
                  ins->dreg, ins->sreg1, ins->sreg2);
-          alpha_srl(code, ins->sreg1, ins->sreg2, ins->dreg);
+          alpha_sra(code, ins->sreg1, ins->sreg2, ins->dreg);
           break;
 
+       case OP_LSHR_UN:
+          // Shift 64 bit value right
+          CFG_DEBUG(4) g_print("ALPHA_CHECK: [long_shr_un] dreg=%d, sreg1=%d, sreg2=%d\n",
+                  ins->dreg, ins->sreg1, ins->sreg2);
+           alpha_srl(code, ins->sreg1, ins->sreg2, ins->dreg);
+           break;
+
         case OP_LSHR_IMM:
           // Shift 64 bit value right by constant
           g_assert(alpha_is_imm(ins->inst_imm));
-          CFG_DEBUG(4) g_print("ALPHA_CHECK: [long_shr] dreg=%d, sreg1=%d, const=%ld\n",
+          CFG_DEBUG(4) g_print("ALPHA_CHECK: [long_shr_imm] dreg=%d, sreg1=%d, const=%ld\n",
                  ins->dreg, ins->sreg1, ins->inst_imm);
-          alpha_srl_(code, ins->sreg1, ins->inst_imm, ins->dreg);
+          alpha_sra_(code, ins->sreg1, ins->inst_imm, ins->dreg);
           break;
 
         case OP_ISHL:
@@ -2227,6 +2300,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            CFG_DEBUG(4) g_print("ALPHA_CHECK: [int_shl] dreg=%d, sreg1=%d, sreg2=%d\n",
                   ins->dreg, ins->sreg1, ins->sreg2);
            alpha_sll(code, ins->sreg1, ins->sreg2, ins->dreg);
+          alpha_addl_(code, ins->dreg, 0, ins->dreg);
            break;
 
         case OP_ISHL_IMM:
@@ -2235,6 +2309,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            CFG_DEBUG(4) g_print("ALPHA_CHECK: [int_shl_imm] dreg=%d, sreg1=%d, const=%ld\n",
                   ins->dreg, ins->sreg1, ins->inst_imm);
            alpha_sll_(code, ins->sreg1, ins->inst_imm, ins->dreg);
+          alpha_addl_(code, ins->dreg, 0, ins->dreg);
            break;
 
         case OP_SHL_IMM:
@@ -2244,6 +2319,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            alpha_sll_(code, ins->sreg1, ins->inst_imm, ins->dreg);
            break;
 
+         case OP_LSHL_IMM:
+           g_assert(alpha_is_imm(ins->inst_imm));
+           CFG_DEBUG(4) g_print("ALPHA_CHECK: [long_shl_imm] dreg=%d, sreg1=%d, const=%ld\n",
+                  ins->dreg, ins->sreg1, ins->inst_imm);
+           alpha_sll_(code, ins->sreg1, ins->inst_imm, ins->dreg);
+           break;
+
+
         case CEE_SHL:
            // Shift 32 bit value left
            CFG_DEBUG(4) g_print("ALPHA_CHECK: [shl] dreg=%d, sreg1=%d, sreg2=%d\n",
@@ -2251,6 +2334,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            alpha_sll(code, ins->sreg1, ins->sreg2, ins->dreg);
            break;
 
+         case OP_LSHL:
+           // Shift 64 bit value left
+           CFG_DEBUG(4) g_print("ALPHA_CHECK: [long_shl] dreg=%d, sreg1=%d, sreg2=%d\n",
+                  ins->dreg, ins->sreg1, ins->sreg2);
+           alpha_sll(code, ins->sreg1, ins->sreg2, ins->dreg);
+           break;
+
 
          case OP_ISHR:
            // Shift 32 bit value right
@@ -2273,17 +2363,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            // Shift 32 bit unsigned value right
            CFG_DEBUG(4) g_print("ALPHA_CHECK: [int_shr_un] dreg=%d, sreg1=%d, sreg2=%d\n",
                   ins->dreg, ins->sreg1, ins->sreg2);
-          alpha_zap_(code, ins->sreg1, 0xF0, ins->dreg);
-           alpha_srl(code, ins->dreg, ins->sreg2, ins->dreg);
+          alpha_zap_(code, ins->sreg1, 0xF0, alpha_at /*ins->dreg*/);
+           alpha_srl(code, alpha_at /*ins->dreg*/, ins->sreg2, ins->dreg);
            break;
 
          case OP_ISHR_UN_IMM:
            // Shift 32 bit unassigned value rigth by constant
            g_assert(alpha_is_imm(ins->inst_imm));
            CFG_DEBUG(4) g_print("ALPHA_CHECK: [int_shr_un_imm] dreg=%d, sreg1=%d, const=%ld\n",
-                  ins->dreg, ins->sreg1, ins->inst_imm);
-          alpha_zap_(code, ins->sreg1, 0xF0, ins->dreg);
-           alpha_srl_(code, ins->dreg, ins->inst_imm, ins->dreg);
+                               ins->dreg, ins->sreg1, ins->inst_imm);
+          alpha_zap_(code, ins->sreg1, 0xF0, alpha_at /*ins->dreg*/);
+           alpha_srl_(code, alpha_at /*ins->dreg*/, ins->inst_imm, ins->dreg);
            break;
 
          case OP_LSHR_UN_IMM:
@@ -2680,10 +2770,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           // Load unassigned byte from REGOFFSET
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [loadu1_membase] dreg=%d, basereg=%d, offset=%0lx\n",
                  ins->dreg, ins->inst_basereg, ins->inst_offset);
-
-          alpha_ldq_u(code, alpha_r25, ins->inst_basereg, ins->inst_offset);
-          alpha_lda(code, alpha_at, ins->inst_basereg, ins->inst_offset);
-          alpha_extbl(code, alpha_r25, alpha_at, ins->dreg);
+          if (bwx_supported)
+            alpha_ldbu(code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+          else
+            {
+              alpha_ldq_u(code, alpha_r25, ins->inst_basereg,
+                          ins->inst_offset);
+              alpha_lda(code, alpha_at, ins->inst_basereg, ins->inst_offset);
+              alpha_extbl(code, alpha_r25, alpha_at, ins->dreg);
+            }
           break;
           
         case OP_LOADU2_MEMBASE:
@@ -2691,14 +2786,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [loadu2_membase] dreg=%d, basereg=%d, offset=%0lx\n",
                  ins->dreg, ins->inst_basereg, ins->inst_offset);
 
-           alpha_ldq_u(code, alpha_r24, ins->inst_basereg, ins->inst_offset);
-          alpha_ldq_u(code, alpha_r25, ins->inst_basereg,
-                      (ins->inst_offset+1));
-           alpha_lda(code, alpha_at, ins->inst_basereg, ins->inst_offset);
-           alpha_extwl(code, alpha_r24, alpha_at, ins->dreg);
-          alpha_extwh(code, alpha_r25, alpha_at, alpha_r25);
-          alpha_bis(code, alpha_r25, ins->dreg, ins->dreg);
-
+          if (bwx_supported)
+            alpha_ldwu(code, ins->dreg, ins->inst_basereg, ins->inst_offset);
+          else
+            {
+              alpha_ldq_u(code, alpha_r24, ins->inst_basereg,
+                          ins->inst_offset);
+              alpha_ldq_u(code, alpha_r25, ins->inst_basereg,
+                          (ins->inst_offset+1));
+              alpha_lda(code, alpha_at, ins->inst_basereg, ins->inst_offset);
+              alpha_extwl(code, alpha_r24, alpha_at, ins->dreg);
+              alpha_extwh(code, alpha_r25, alpha_at, alpha_r25);
+              alpha_bis(code, alpha_r25, ins->dreg, ins->dreg);
+            }
           break;
           
         case OP_LOAD_MEMBASE:
@@ -2723,71 +2823,104 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           // Load sign-extended byte from REGOFFSET
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [loadi1_membase] dreg=%d, basereg=%d, offset=%0lx\n",
                  ins->dreg, ins->inst_basereg, ins->inst_offset);
-          alpha_ldq_u(code, alpha_r25, ins->inst_basereg, ins->inst_offset);
-          alpha_lda(code, alpha_at, ins->inst_basereg, (ins->inst_offset+1));
-          alpha_extqh(code, alpha_r25, alpha_at, ins->dreg);
-          alpha_sra_(code, ins->dreg, 56, ins->dreg);
+          if (bwx_supported)
+            {
+              alpha_ldbu(code, ins->dreg, ins->inst_basereg,
+                         ins->inst_offset);
+              alpha_sextb(code, ins->dreg, ins->dreg);
+            }
+          else
+            {
+              alpha_ldq_u(code, alpha_r25, ins->inst_basereg,
+                          ins->inst_offset);
+              alpha_lda(code, alpha_at, ins->inst_basereg,
+                        (ins->inst_offset+1));
+              alpha_extqh(code, alpha_r25, alpha_at, ins->dreg);
+              alpha_sra_(code, ins->dreg, 56, ins->dreg);
+            }
           break;
           
         case OP_LOADI2_MEMBASE:
           // Load sign-extended word from REGOFFSET
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [loadi2_membase] dreg=%d, basereg=%d, offset=%0lx\n",
                  ins->dreg, ins->inst_basereg, ins->inst_offset);
-           alpha_ldq_u(code, alpha_r24, ins->inst_basereg, ins->inst_offset);
-           alpha_ldq_u(code, alpha_r25, ins->inst_basereg,
-                      (ins->inst_offset+1));
-           alpha_lda(code, alpha_at, ins->inst_basereg, (ins->inst_offset+2));
-           alpha_extql(code, alpha_r24, alpha_at, ins->dreg);
-           alpha_extqh(code, alpha_r25, alpha_at, alpha_r25);
-           alpha_bis(code, alpha_r25, ins->dreg, ins->dreg);
-          alpha_sra_(code, ins->dreg, 48, ins->dreg);
-          
+          if (bwx_supported)
+            {
+              alpha_ldwu(code, ins->dreg, ins->inst_basereg,
+                         ins->inst_offset);
+              alpha_sextw(code, ins->dreg, ins->dreg);
+            }
+          else
+            {
+              alpha_ldq_u(code, alpha_r24, ins->inst_basereg,
+                          ins->inst_offset);
+              alpha_ldq_u(code, alpha_r25, ins->inst_basereg,
+                          (ins->inst_offset+1));
+              alpha_lda(code, alpha_at, ins->inst_basereg,
+                        (ins->inst_offset+2));
+              alpha_extql(code, alpha_r24, alpha_at, ins->dreg);
+              alpha_extqh(code, alpha_r25, alpha_at, alpha_r25);
+              alpha_bis(code, alpha_r25, ins->dreg, ins->dreg);
+              alpha_sra_(code, ins->dreg, 48, ins->dreg);
+            }
           break;                        
           
         case OP_STOREI1_MEMBASE_IMM:
           // Store signed byte at REGOFFSET
-          // For now storei1_membase_reg will do the work
-          g_assert_not_reached();
-          /*
-          printf("ALPHA_TODO: [storei1_membase_imm] const=%0lx, destbasereg=%d, offset=%0lx\n",
+          // Valid only for storing 0
+          // storei1_membase_reg will do the rest
+          
+          CFG_DEBUG(4) g_print("ALPHA_CHECK: [storei1_membase_imm(0)] const=%0lx, destbasereg=%d, offset=%0lx\n",
                  ins->inst_imm, ins->inst_destbasereg, ins->inst_offset);
-          g_assert(alpha_is_imm(ins->inst_imm));
+          g_assert(ins->inst_imm == 0);
 
-          alpha_lda(code, alpha_r25, alpha_zero, ins->inst_imm);
+          if (bwx_supported)
+               alpha_stb(code, alpha_zero, ins->inst_destbasereg,
+                       ins->inst_offset);
+          else
+               g_assert_not_reached();
 
-          alpha_lda(code, alpha_at, ins->inst_destbasereg, ins->inst_offset);
-          alpha_ldq_u(code, alpha_r24, ins->inst_destbasereg, ins->inst_offset);
-          alpha_insbl(code, alpha_r25, alpha_at, alpha_r23);
-          alpha_mskbl(code, alpha_r24, alpha_at, alpha_r24);
-          alpha_bis(code, alpha_r24, alpha_r23, alpha_r24);
-          alpha_stq_u(code, alpha_r24, ins->inst_destbasereg, ins->inst_offset);
-          */
           break;
 
         case OP_STOREI1_MEMBASE_REG:
           // Store byte at REGOFFSET
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [storei1_membase_reg] sreg1=%d, destbasereg=%d, offset=%0lx\n",
                  ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
-
-           alpha_lda(code, alpha_at, ins->inst_destbasereg, ins->inst_offset);
-           alpha_ldq_u(code, alpha_r25, ins->inst_destbasereg,
-                      ins->inst_offset);
-           alpha_insbl(code, ins->sreg1, alpha_at, alpha_r24);
-           alpha_mskbl(code, alpha_r25, alpha_at, alpha_r25);
-           alpha_bis(code, alpha_r25, alpha_r24, alpha_r25);
-           alpha_stq_u(code, alpha_r25, ins->inst_destbasereg,
-                      ins->inst_offset);
-
+          if (bwx_supported)
+            {
+              alpha_stb(code, ins->sreg1, ins->inst_destbasereg,
+                        ins->inst_offset);
+            }
+          else
+            {
+              alpha_lda(code, alpha_at, ins->inst_destbasereg,
+                        ins->inst_offset);
+              alpha_ldq_u(code, alpha_r25, ins->inst_destbasereg,
+                          ins->inst_offset);
+              alpha_insbl(code, ins->sreg1, alpha_at, alpha_r24);
+              alpha_mskbl(code, alpha_r25, alpha_at, alpha_r25);
+              alpha_bis(code, alpha_r25, alpha_r24, alpha_r25);
+              alpha_stq_u(code, alpha_r25, ins->inst_destbasereg,
+                          ins->inst_offset);
+            }
           break;
           
         case OP_STOREI2_MEMBASE_IMM:
            // Store signed word at REGOFFSET
+          // Now work only for storing 0
            // For now storei2_membase_reg will do the work
-           g_assert_not_reached();
-          /*
-          printf("ALPHA_TODO: [storei2_membase_imm] const=%0lx, destbasereg=%d, offset=%0lx\n",
+          
+          CFG_DEBUG(4) g_print("ALPHA_CHECK: [storei2_membase_imm(0)] const=%0lx, destbasereg=%d, offset=%0lx\n",
                  ins->inst_imm, ins->inst_destbasereg, ins->inst_offset);
-          */
+          
+          g_assert(ins->inst_imm == 0);
+          
+          if (bwx_supported)
+               alpha_stw(code, alpha_zero, ins->inst_destbasereg,
+                       ins->inst_offset);
+          else
+               g_assert_not_reached();
+
           break;
           
         case OP_STOREI2_MEMBASE_REG:
@@ -2795,21 +2928,30 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            CFG_DEBUG(4) g_print("ALPHA_CHECK: [storei2_membase_reg] sreg1=%d, destbasereg=%d, offset=%0lx\n",
                   ins->sreg1, ins->inst_destbasereg, ins->inst_offset);
           
-          alpha_lda(code, alpha_at, ins->inst_destbasereg, ins->inst_offset);
-          alpha_ldq_u(code, alpha_r25, ins->inst_destbasereg,
-                      (ins->inst_offset+1));
-          alpha_ldq_u(code, alpha_r24, ins->inst_destbasereg,
-                      ins->inst_offset);
-          alpha_inswh(code, ins->sreg1, alpha_at, alpha_r23);
-          alpha_inswl(code, ins->sreg1, alpha_at, alpha_r22);
-          alpha_mskwh(code, alpha_r25, alpha_at, alpha_r25);
-          alpha_mskwl(code, alpha_r24, alpha_at, alpha_r24);
-          alpha_bis(code, alpha_r25, alpha_r23, alpha_r25);
-          alpha_bis(code, alpha_r24, alpha_r22, alpha_r24);
-          alpha_stq_u(code, alpha_r25, ins->inst_destbasereg,
-                      (ins->inst_offset+1));
-          alpha_stq_u(code, alpha_r24, ins->inst_destbasereg,
-                       ins->inst_offset);
+          if (bwx_supported)
+            {
+              alpha_stw(code, ins->sreg1, ins->inst_destbasereg,
+                        ins->inst_offset);
+            }
+          else
+            {
+              alpha_lda(code, alpha_at, ins->inst_destbasereg,
+                        ins->inst_offset);
+              alpha_ldq_u(code, alpha_r25, ins->inst_destbasereg,
+                          (ins->inst_offset+1));
+              alpha_ldq_u(code, alpha_r24, ins->inst_destbasereg,
+                          ins->inst_offset);
+              alpha_inswh(code, ins->sreg1, alpha_at, alpha_r23);
+              alpha_inswl(code, ins->sreg1, alpha_at, alpha_r22);
+              alpha_mskwh(code, alpha_r25, alpha_at, alpha_r25);
+              alpha_mskwl(code, alpha_r24, alpha_at, alpha_r24);
+              alpha_bis(code, alpha_r25, alpha_r23, alpha_r25);
+              alpha_bis(code, alpha_r24, alpha_r22, alpha_r24);
+              alpha_stq_u(code, alpha_r25, ins->inst_destbasereg,
+                          (ins->inst_offset+1));
+              alpha_stq_u(code, alpha_r24, ins->inst_destbasereg,
+                          ins->inst_offset);
+            }
 
           break;
           
@@ -2859,14 +3001,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           // Later check different rounding and exc modes
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [float_add] sreg1=%d, sreg2=%d, dreg=%d\n",
                  ins->sreg1, ins->sreg2, ins->dreg);
-          alpha_addt(code, ins->sreg1, ins->sreg2, ins->dreg);
+          alpha_addt_su(code, ins->sreg1, ins->sreg2, ins->dreg);
+          alpha_trapb(code);
           break;
 
         case OP_FSUB:
           // Later check different rounding and exc modes
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [float_sub] sreg1=%d, sreg2=%d, dreg=%d\n",
                                 ins->sreg1, ins->sreg2, ins->dreg);
-          alpha_subt(code, ins->sreg1, ins->sreg2, ins->dreg);
+          alpha_subt_su(code, ins->sreg1, ins->sreg2, ins->dreg);
+          alpha_trapb(code);
            break;
 
         case OP_FMUL:
@@ -3004,7 +3148,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            break;
 
          case OP_ALPHA_CMP_IMM_ULE:
-           CFG_DEBUG(4) g_print("ALPHA_CHECK: [alpha_cmp_imm_ule] sreg1=%d, const=%0lX, dreg=%\d\n",
+           CFG_DEBUG(4) g_print("ALPHA_CHECK: [alpha_cmp_imm_ule] sreg1=%d, const=%0lX, dreg=%d\n",
                   ins->sreg1, ins->inst_imm, ins->dreg);
            alpha_cmpule_(code, ins->sreg1, ins->inst_imm, alpha_at);
            break;
@@ -3035,7 +3179,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
 
          case OP_ALPHA_CMP_IMM_LE:
-           CFG_DEBUG(4) g_print("ALPHA_CHECK: [alpha_cmp_imm_le] sreg1=%d, const=%0lX, dreg=%\d\n",
+           CFG_DEBUG(4) g_print("ALPHA_CHECK: [alpha_cmp_imm_le] sreg1=%d, const=%0lX, dreg=%d\n",
                   ins->sreg1, ins->inst_imm, ins->dreg);
            alpha_cmple_(code, ins->sreg1, ins->inst_imm, alpha_at);
            break;
@@ -3135,18 +3279,28 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           // Read about sextb
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [conv_i1] sreg=%d, dreg=%d\n",
                  ins->sreg1, ins->dreg);
-           alpha_sll_(code, ins->sreg1, 24, alpha_at);
-           alpha_addl(code, alpha_at, alpha_zero, ins->dreg);
-           alpha_sra_(code, ins->dreg, 24, ins->dreg);
+          if (bwx_supported)
+            alpha_sextb(code, ins->sreg1, ins->dreg);
+          else
+            {
+              alpha_sll_(code, ins->sreg1, 24, alpha_at);
+              alpha_addl(code, alpha_at, alpha_zero, ins->dreg);
+              alpha_sra_(code, ins->dreg, 24, ins->dreg);
+            }
           break;
 
         case CEE_CONV_I2:
           // Move I2 (word) to dreg(64 bits) and sign extend it
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [conv_i2] sreg=%d, dreg=%d\n",
                  ins->sreg1, ins->dreg);
-          alpha_sll_(code, ins->sreg1, 16, alpha_at);
-          alpha_addl(code, alpha_at, alpha_zero, ins->dreg);
-          alpha_sra_(code, ins->dreg, 16, ins->dreg);
+          if (bwx_supported)
+            alpha_sextw(code, ins->sreg1, ins->dreg);
+          else
+            {
+              alpha_sll_(code, ins->sreg1, 16, alpha_at);
+              alpha_addl(code, alpha_at, alpha_zero, ins->dreg);
+              alpha_sra_(code, ins->dreg, 16, ins->dreg);
+            }
           break;
           
         case CEE_CONV_I4:
@@ -3271,6 +3425,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           alpha_ldt(code, ins->dreg, alpha_sp, 0);
           alpha_lda(code, alpha_sp, alpha_sp, 8);
           alpha_cvtqs(code, ins->dreg, ins->dreg);
+          alpha_trapb(code);
           break;
 
          case CEE_CONV_R8:
@@ -3283,26 +3438,50 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            alpha_ldt(code, ins->dreg, alpha_sp, 0);
            alpha_lda(code, alpha_sp, alpha_sp, 8);
            alpha_cvtqt(code, ins->dreg, ins->dreg);
+          alpha_trapb(code);
            break;
 
         case OP_FCONV_TO_R4:
           // Convert 64 bit float to 32 bit float (T -> S)
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [fconv_r4] sreg=%d, dreg=%d\n",
                                 ins->sreg1, ins->dreg);
-          alpha_cvtts(code, ins->sreg1, ins->dreg);
+          alpha_cvtts_su(code, ins->sreg1, ins->dreg);
+          alpha_trapb(code);
           break;
 
         case OP_LOCALLOC:
           // Allocate sreg1 bytes on stack, round bytes by 8,
           // modify SP, set dreg to end of current stack frame
           // top of stack is used for call params
-          CFG_DEBUG(4) g_print("ALPHA_FIX: [localloc] sreg=%d, dreg=%d\n",
+          CFG_DEBUG(4) g_print("ALPHA_CHECK: [localloc] sreg=%d, dreg=%d\n",
                                ins->sreg1, ins->dreg);
-          alpha_addq_(code, ins->sreg1, (MONO_ARCH_FRAME_ALIGNMENT - 1), ins->sreg1);
-          alpha_and_(code, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1), ins->sreg1);
+
+          alpha_addq_(code, ins->sreg1, (MONO_ARCH_LOCALLOC_ALIGNMENT - 1), ins->sreg1);
+          alpha_bic_(code, ins->sreg1, (MONO_ARCH_LOCALLOC_ALIGNMENT - 1), ins->sreg1);
+          if (ins->flags & MONO_INST_INIT)
+              alpha_mov1(code, ins->sreg1, ins->sreg2);
+
           alpha_subq(code, alpha_sp, ins->sreg1, alpha_sp);
-          alpha_lda(code, ins->dreg, alpha_zero, (cfg->arch.params_stack_size));
-          alpha_addq(code, alpha_sp, ins->dreg, ins->dreg);
+          if (cfg->arch.params_stack_size > 0)
+          {
+              alpha_lda(code, ins->dreg, alpha_zero,
+                       (cfg->arch.params_stack_size));
+              alpha_addq(code, alpha_sp, ins->dreg, ins->dreg);
+          }
+          else
+              alpha_mov1(code, alpha_sp, ins->dreg);
+
+          if (ins->flags & MONO_INST_INIT)
+          {
+               // TODO: Optimize it later
+               alpha_lda(code, ins->sreg2, ins->sreg2,
+                       -(MONO_ARCH_LOCALLOC_ALIGNMENT));
+               alpha_blt(code, ins->sreg2, 3);
+               alpha_addq(code, ins->sreg2, ins->dreg, alpha_at);
+               alpha_stq(code, alpha_zero, alpha_at, 0);
+               alpha_br(code, alpha_zero, -5);
+          }
+
           break;
 
         case OP_MOVE:
@@ -3414,6 +3593,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            EMIT_ALPHA_BRANCH(ins, alpha_at, fbeq);
            break;
 
+        case OP_FBGE:
+          CFG_DEBUG(4) g_print("ALPHA_CHECK: [fbge] [");
+          alpha_fbne(code, (alpha_at+1), 1);
+          EMIT_ALPHA_BRANCH(ins, alpha_at, fbeq);
+           break;
+
          case OP_FBLE_UN:
            CFG_DEBUG(4) g_print("ALPHA_CHECK: [fble_un] [");
            alpha_fbeq(code, (alpha_at+1), 1);
@@ -3421,6 +3606,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            EMIT_ALPHA_BRANCH(ins, alpha_at, fbne);
            break;
 
+         case OP_FBLE:
+           CFG_DEBUG(4) g_print("ALPHA_CHECK: [fble] [");
+           alpha_fbne(code, (alpha_at+1), 1);
+           EMIT_ALPHA_BRANCH(ins, alpha_at, fbne);
+           break;
+
          case OP_FBLT_UN:
            CFG_DEBUG(4) g_print("ALPHA_CHECK: [fblt_un] [");
            alpha_fbeq(code, (alpha_at+1), 1);
@@ -3428,6 +3619,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            EMIT_ALPHA_BRANCH(ins, alpha_at, fbne);
            break;
 
+         case OP_FBLT:
+           CFG_DEBUG(4) g_print("ALPHA_CHECK: [fblt] [");
+           alpha_fbne(code, (alpha_at+1), 1);
+           EMIT_ALPHA_BRANCH(ins, alpha_at, fbne);
+           break;
+
          case OP_FBGT_UN:
            CFG_DEBUG(4) g_print("ALPHA_CHECK: [fbgt_un] [");
            alpha_fbeq(code, (alpha_at+1), 1);
@@ -3435,6 +3632,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
            EMIT_ALPHA_BRANCH(ins, alpha_at, fbeq);
            break;
 
+         case OP_FBGT:
+           CFG_DEBUG(4) g_print("ALPHA_CHECK: [fbgt] [");
+           alpha_fbne(code, (alpha_at+1), 1);
+           EMIT_ALPHA_BRANCH(ins, alpha_at, fbeq);
+           break;
+
         case OP_IBEQ:
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [ibeq] [");
           EMIT_ALPHA_BRANCH(ins, alpha_at, beq);
@@ -3460,7 +3663,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           ins->inst_c0 = (char *)code - (char *)cfg->native_code;
           break;
           
-        case CEE_BR:
+        case OP_BR:
           CFG_DEBUG(4) g_print("ALPHA_CHECK: [br] target: %p, next: %p, curr: %p, last: %p [",
                  ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
           
@@ -3618,7 +3821,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
             // of by call_filter. There should be difference. For now just
             // handle - call_handler
 
-            CFG_DEBUG(4) g_print("ALPHA_CHECK: [start_handler] basereg=%d, offset=%0x\n",
+            CFG_DEBUG(4) g_print("ALPHA_CHECK: [start_handler] basereg=%d, offset=%0lx\n",
                ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
 
             alpha_stq(code, alpha_ra, ins->inst_left->inst_basereg, 
@@ -3626,10 +3829,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           }
           break;
 
-        case CEE_ENDFINALLY:
-       {
+        case OP_ENDFINALLY:
+          {
              // Keep in sync with start_handler
-             CFG_DEBUG(4) g_print("ALPHA_CHECK: [endfinally] basereg=%d, offset=%0x\n",
+             CFG_DEBUG(4) g_print("ALPHA_CHECK: [endfinally] basereg=%d, offset=%0lx\n",
                 ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
 
              alpha_ldq(code, alpha_ra, ins->inst_left->inst_basereg,
@@ -3637,12 +3840,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
              alpha_ret(code, alpha_ra, 1);
 
-       }
-       break;
+          }
+          break;
         case OP_ENDFILTER:
           {
             // Keep in sync with start_handler
-            CFG_DEBUG(4) g_print("ALPHA_CHECK: [endfilter] sreg1=%d, basereg=%d, offset=%0x\n",
+            CFG_DEBUG(4) g_print("ALPHA_CHECK: [endfilter] sreg1=%d, basereg=%d, offset=%0lx\n",
                ins->sreg1, ins->inst_left->inst_basereg, ins->inst_left->inst_offset);
 
             alpha_ldq(code, alpha_ra, ins->inst_left->inst_basereg,
@@ -3668,7 +3871,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                  MONO_PATCH_INFO_BB,
                                  ins->inst_target_bb);
             alpha_bsr(code, alpha_ra, 0);
-
           }
           break;
           
@@ -3678,8 +3880,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           alpha_ret(code, alpha_ra, 1);
           break;
 
-        case CEE_THROW:
-          CFG_DEBUG(4) g_print("ALPHA_CHECK: [throw] sreg1=%0lx\n",
+        case OP_THROW:
+          CFG_DEBUG(4) g_print("ALPHA_CHECK: [throw] sreg1=%0x\n",
                                ins->sreg1);
           alpha_mov1(code, ins->sreg1, alpha_a0);
           code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
@@ -3687,26 +3889,28 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           break;
 
          case OP_RETHROW:
-           CFG_DEBUG(4) g_print("ALPHA_CHECK: [rethrow] sreg1=%0lx\n",
+           CFG_DEBUG(4) g_print("ALPHA_CHECK: [rethrow] sreg1=%0x\n",
                                 ins->sreg1);
            alpha_mov1(code, ins->sreg1, alpha_a0);
            code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
                              (gpointer)"mono_arch_rethrow_exception");
            break;
 
-        case CEE_JMP:
+        case OP_JMP:
           {
-            CFG_DEBUG(4) g_print("ALPHA_CHECK: [jmp] %p\n", ins->inst_p0);
             /*
-             * Note: this 'frame destruction' logic is useful for tail calls, too.
-             * Keep in sync with the code in emit_epilog.
+             * Note: this 'frame destruction' logic is useful for tail calls,
+             too. Keep in sync with the code in emit_epilog.
              */
-            int pos = 0, i, offset;
+            int offset;
             AlphaGotData ge_data;
 
+            CFG_DEBUG(4) g_print("ALPHA_CHECK: [jmp] %p\n", ins->inst_p0);
+
             /* FIXME: no tracing support... */
             if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
-              code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
+              code = mono_arch_instrument_epilog (cfg,
+                                  mono_profiler_method_leave, code, FALSE);
             g_assert (!cfg->method->save_lmf);
 
             alpha_mov1( code, alpha_fp, alpha_sp );
@@ -3733,7 +3937,56 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
           mono_add_patch_info (cfg, offset,
                                (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
           break;
-          
+
+        case OP_MEMORY_BARRIER:
+          CFG_DEBUG(4) g_print("ALPHA_CHECK: [mb]\n");
+          alpha_mb(code);
+          break;
+         
+        case OP_CKFINITE:
+          // Float register contains a value which we need to check
+          {
+               double  ni = -1.0 / 0.0;
+               double  pi = 1.0 / 0.0;
+               AlphaGotData    ge_data;
+
+               CFG_DEBUG(4) g_print("ALPHA_TODO: [chfinite] sreg1=%d\n", ins->sreg1);
+               alpha_cmptun_su(code, ins->sreg1, ins->sreg1, alpha_at);
+               alpha_trapb(code);
+               EMIT_COND_EXC_BRANCH(fbne, alpha_at, "ArithmeticException");
+
+               // Negative infinity
+               ge_data.data.d = ni;
+               add_got_entry(cfg, GT_DOUBLE, ge_data,
+                           (char *)code - (char *)cfg->native_code,
+                           MONO_PATCH_INFO_NONE, 0);
+               alpha_ldt(code, alpha_at, alpha_gp, 0);
+
+               alpha_cmpteq_su(code, ins->sreg1, alpha_at, alpha_at);
+               alpha_trapb(code);
+
+               EMIT_COND_EXC_BRANCH(fbne, alpha_at, "ArithmeticException");
+
+                // Positive infinity
+                ge_data.data.d = pi;
+                add_got_entry(cfg, GT_DOUBLE, ge_data,
+                           (char *)code - (char *)cfg->native_code,
+                           MONO_PATCH_INFO_NONE, 0);
+                alpha_ldt(code, alpha_at, alpha_gp, 0);
+
+                alpha_cmpteq_su(code, ins->sreg1, alpha_at, alpha_at);
+                alpha_trapb(code);
+
+                EMIT_COND_EXC_BRANCH(fbne, alpha_at, "ArithmeticException");
+          }
+          break;
+        case OP_FDIV:
+          CFG_DEBUG(4) g_print("ALPHA_TODO: [fdiv] dest=%d, sreg1=%d, sreg2=%d\n",
+               ins->dreg, ins->sreg1, ins->sreg2);
+          alpha_divt_su(code, ins->sreg1, ins->sreg2, ins->dreg);
+          alpha_trapb(code);
+
+          break;
         default:
           g_warning ("unknown opcode %s in %s()\n",
                      mono_inst_name (ins->opcode), __FUNCTION__);
@@ -3789,8 +4042,8 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask)
    /*----------------------------------------------------------*/
    /* no alpha-specific optimizations yet                       */
    /*----------------------------------------------------------*/
-   *exclude_mask = MONO_OPT_INLINE|MONO_OPT_LINEARS;
-   //      *exclude_mask = MONO_OPT_INLINE;
+   *exclude_mask = MONO_OPT_LINEARS;
+   //      *exclude_mask = MONO_OPT_INLINE|MONO_OPT_INLINE;
 
    return opts;
 }
@@ -3920,7 +4173,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain,
 
        case MONO_PATCH_INFO_GOT_OFFSET:
          {
-           unsigned int *ip2 = ip;
+           unsigned int *ip2 = (unsigned int *)ip;
            unsigned int inst = *ip2;
            unsigned int off = patch_info->data.offset & 0xFFFFFFFF;
 
@@ -3935,7 +4188,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain,
        case MONO_PATCH_INFO_CLASS_INIT: 
          {               
            /* Might already been changed to a nop */
-           unsigned int* ip2 = ip;
+           unsigned int* ip2 = (unsigned int *)ip;
           unsigned long t_addr = (unsigned long)target;
  
            if (*ip2 != (t_addr & 0xFFFFFFFF) ||
@@ -4059,7 +4312,7 @@ mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst,
        int this_reg, int this_type, int vt_reg)
 {
   MonoCallInst *call = (MonoCallInst*)inst;
-  CallInfo * cinfo = get_call_info (inst->signature, FALSE);
+  CallInfo * cinfo = get_call_info (cfg->generic_sharing_context, inst->signature, FALSE);
 
   CFG_DEBUG(2) ALPHA_DEBUG("mono_arch_emit_this_vret_args");
 
@@ -4169,9 +4422,37 @@ mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
 void
 mono_arch_cpu_init (void)
 {
-   ALPHA_DEBUG("mono_arch_cpu_init");
+  unsigned long amask, implver;
+  register long v0 __asm__("$0") = -1;
+  ALPHA_DEBUG("mono_arch_cpu_init");
+
+  __asm__ (".long 0x47e00c20" : "=r"(v0) : "0"(v0));
+  amask = ~v0;
+  __asm__ (".long 0x47e03d80" : "=r"(v0));
+  implver = v0;
+
+  if (amask & 1)
+    bwx_supported = 1;
+
+  //printf("amask: %x, implver: %x", amask, implver);
+}
+
+/*
+ * Initialize architecture specific code.
+ */
+void
+mono_arch_init (void)
+{
 }
 
+/*
+ * Cleanup architecture specific code.
+ */
+void
+mono_arch_cleanup (void)
+{
+}
 
 /*
  * get_call_info:
@@ -4184,9 +4465,9 @@ mono_arch_cpu_init (void)
  * For x86 win32, see ???.
  */
 static CallInfo*
-get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
+get_call_info (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gboolean is_pinvoke)
 {
-   guint32 i, gr, fr;
+   guint32 i, gr, fr, *pgr, *pfr;
    MonoType *ret_type;
    int n = sig->hasthis + sig->param_count;
    guint32 stack_size = 0;
@@ -4197,6 +4478,14 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
    gr = 0;
    fr = 0;
    
+   if (is_pinvoke)
+       pgr = pfr = &gr;
+   else
+   {
+       pgr = &gr;
+       pfr = &fr;
+   }
+
    /* return value */
    {
      ret_type = mono_type_get_underlying_type (sig->ret);
@@ -4227,42 +4516,49 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
        cinfo->ret.reg = alpha_r0;
        break;
      case MONO_TYPE_R4:
-       cinfo->ret.storage = ArgInFloatSSEReg;
+       cinfo->ret.storage = ArgInFloatReg;
        cinfo->ret.reg = alpha_f0;
        break;
      case MONO_TYPE_R8:
-       cinfo->ret.storage = ArgInDoubleSSEReg;
+       cinfo->ret.storage = ArgInDoubleReg;
        cinfo->ret.reg = alpha_f0;
        break;
+     case MONO_TYPE_GENERICINST:
+       if (!mono_type_generic_inst_is_valuetype (sig->ret))
+        {
+          cinfo->ret.storage = ArgInIReg;
+          cinfo->ret.reg = alpha_r0;
+          break;
+        }
+       /* Fall through */
      case MONO_TYPE_VALUETYPE:
        {
         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
                        
-        add_valuetype (sig, &cinfo->ret, sig->ret, TRUE,
+        add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE,
                        &tmp_gr, &tmp_fr, &tmp_stacksize);
         
         if (cinfo->ret.storage == ArgOnStack)
           /* The caller passes the address where the value
              is stored */
-          add_general (&gr, &stack_size, &cinfo->ret);
+          add_general (pgr, &stack_size, &cinfo->ret);
         break;
        }
      case MONO_TYPE_TYPEDBYREF:
        /* Same as a valuetype with size 24 */
-       add_general (&gr, &stack_size, &cinfo->ret);
+       add_general (pgr, &stack_size, &cinfo->ret);
        ;
        break;
      case MONO_TYPE_VOID:
        break;
      default:
-       g_error ("Can't handle as return value 0x%x", sig->ret->
-               type);
+       g_error ("Can't handle as return value 0x%x", sig->ret->type);
      }
    }
    
    /* this */
    if (sig->hasthis)
-     add_general (&gr, &stack_size, cinfo->args + 0);
+     add_general (pgr, &stack_size, cinfo->args + 0);
    
    if (!sig->pinvoke &&
           (sig->call_convention == MONO_CALL_VARARG) && (n == 0))
@@ -4272,7 +4568,7 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
                
        /* Emit the signature cookie just before the implicit arguments
        */
-       add_general (&gr, &stack_size, &cinfo->sig_cookie);
+       add_general (pgr, &stack_size, &cinfo->sig_cookie);
      }
    
    for (i = 0; i < sig->param_count; ++i)
@@ -4294,11 +4590,11 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
           fr = FLOAT_PARAM_REGS;
                         
           /* Emit the signature cookie just before the implicit arguments */
-          add_general (&gr, &stack_size, &cinfo->sig_cookie);
+          add_general (pgr, &stack_size, &cinfo->sig_cookie);
         }
                
        if (sig->params [i]->byref) {
-        add_general (&gr, &stack_size, ainfo);
+        add_general (pgr, &stack_size, ainfo);
         continue;
        }
        
@@ -4308,16 +4604,16 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
        case MONO_TYPE_BOOLEAN:
        case MONO_TYPE_I1:
        case MONO_TYPE_U1:
-        add_general (&gr, &stack_size, ainfo);
+        add_general (pgr, &stack_size, ainfo);
         break;
        case MONO_TYPE_I2:
        case MONO_TYPE_U2:
        case MONO_TYPE_CHAR:
-        add_general (&gr, &stack_size, ainfo);
+        add_general (pgr, &stack_size, ainfo);
         break;
        case MONO_TYPE_I4:
        case MONO_TYPE_U4:
-        add_general (&gr, &stack_size, ainfo);
+        add_general (pgr, &stack_size, ainfo);
         break;
        case MONO_TYPE_I:
        case MONO_TYPE_U:
@@ -4328,11 +4624,20 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
        case MONO_TYPE_STRING:
        case MONO_TYPE_SZARRAY:
        case MONO_TYPE_ARRAY:
-        add_general (&gr, &stack_size, ainfo);
+        add_general (pgr, &stack_size, ainfo);
         break;
+       case MONO_TYPE_GENERICINST:
+        if (!mono_type_generic_inst_is_valuetype (sig->params [i]))
+          {
+            add_general (pgr, &stack_size, ainfo);
+            break;
+          }
+        /* Fall through */
        case MONO_TYPE_VALUETYPE:
-        add_valuetype (sig, ainfo, sig->params [i],
-                       FALSE, &gr, &fr, &stack_size);
+        /* FIXME: */
+        /* We allways pass valuetypes on the stack */
+        add_valuetype (gsctx, sig, ainfo, sig->params [i],
+                       FALSE, pgr, pfr, &stack_size);
         break;
        case MONO_TYPE_TYPEDBYREF:
         stack_size += sizeof (MonoTypedRef);
@@ -4340,13 +4645,13 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
         break;
        case MONO_TYPE_U8:
        case MONO_TYPE_I8:
-        add_general (&gr, &stack_size, ainfo);
+        add_general (pgr, &stack_size, ainfo);
         break;
        case MONO_TYPE_R4:
-        add_float (&fr, &stack_size, ainfo, FALSE);
+        add_float (pfr, &stack_size, ainfo, FALSE);
         break;
        case MONO_TYPE_R8:
-        add_float (&fr, &stack_size, ainfo, TRUE);
+        add_float (pfr, &stack_size, ainfo, TRUE);
         break;
        default:
         g_assert_not_reached ();
@@ -4361,7 +4666,7 @@ get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
                
        /* Emit the signature cookie just before the implicit arguments
        */
-       add_general (&gr, &stack_size, &cinfo->sig_cookie);
+       add_general (pgr, &stack_size, &cinfo->sig_cookie);
      }
    
    cinfo->stack_usage = stack_size;
@@ -4489,7 +4794,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb,
    n = sig->param_count + sig->hasthis;
 
    // Collect info about method we age going to call
-   cinfo = get_call_info (sig, sig->pinvoke);
+   cinfo = get_call_info (cfg->generic_sharing_context, sig, sig->pinvoke);
 
    CFG_DEBUG(3) g_print("ALPHA: Will call %s method with %d(%d) params. RetType: %s(0x%X)\n",
                        sig->pinvoke ? "PInvoke" : "Managed",
@@ -4570,7 +4875,7 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb,
         if ((i >= sig->hasthis) &&
             (MONO_TYPE_ISSTRUCT(arg_type)))
           {
-            gint align;
+            guint align;
             guint32 size;
             
             if (arg_type->type == MONO_TYPE_TYPEDBYREF) {
@@ -4582,29 +4887,128 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb,
                 size = mono_type_native_stack_size (&in->klass->byval_arg,
                                                     &align);
               else
-                size = mono_type_stack_size (&in->klass->byval_arg, &align);
+                size = mini_type_stack_size (cfg->generic_sharing_context, &in->klass->byval_arg, &align);
 
-            {
-              MonoInst *stack_addr;
+            if (ainfo->storage == ArgAggregate)
+              {
+                MonoInst *vtaddr, *load, *load2, *offset_ins, *set_reg;
+                int slot, j;
+
+                 CFG_DEBUG(3) g_print("aggregate value type, size:%d\n", size);
+
+                vtaddr = mono_compile_create_var (cfg,
+                             &mono_defaults.int_class->byval_arg, OP_LOCAL);
+
+                /*
+                 * Part of the structure is passed in registers.
+                 */
+                for (j = 0; j < ainfo->nregs; ++j)
+                  {
+                    int offset, load_op, dest_reg, arg_storage;
+
+                    slot = ainfo->reg + j;
+                    load_op = CEE_LDIND_I;
+                    offset = j * 8;
+                    dest_reg = ainfo->reg + j;
+                    arg_storage = ArgInIReg;
+                    
+                    MONO_INST_NEW (cfg, load, CEE_LDIND_I);
+                    load->ssa_op = MONO_SSA_LOAD;
+                    load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
+
+                    NEW_ICONST (cfg, offset_ins, offset);
+                    MONO_INST_NEW (cfg, load2, CEE_ADD);
+                    load2->inst_left = load;
+                    load2->inst_right = offset_ins;
+
+                    MONO_INST_NEW (cfg, load, load_op);
+                    load->inst_left = load2;
+
+                    if (j == 0)
+                      set_reg = arg;
+                    else
+                      MONO_INST_NEW (cfg, set_reg, OP_OUTARG_REG);
+
+                    add_outarg_reg (cfg, call, set_reg, arg_storage,
+                                    dest_reg, load);
+                    if (set_reg != call->out_args)
+                      {
+                        set_reg->next = call->out_args;
+                        call->out_args = set_reg;
+                    }
+                  }
+
+                /*
+                 * Part of the structure is passed on the stack.
+                 */
+                for (j = ainfo->nregs; j < ainfo->nslots; ++j)
+                  {
+                    MonoInst *outarg;
+
+                    slot = ainfo->reg + j;
+
+                    MONO_INST_NEW (cfg, load, CEE_LDIND_I);
+                    load->ssa_op = MONO_SSA_LOAD;
+                    load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
+
+                    NEW_ICONST (cfg, offset_ins, (j * sizeof (gpointer)));
+                    MONO_INST_NEW (cfg, load2, CEE_ADD);
+                    load2->inst_left = load;
+                    load2->inst_right = offset_ins;
+
+                    MONO_INST_NEW (cfg, load, CEE_LDIND_I);
+                    load->inst_left = load2;
+
+                    if (j == 0)
+                      outarg = arg;
+                    else
+                      MONO_INST_NEW (cfg, outarg, OP_OUTARG);
+                    
+                    outarg->inst_left = load;
+                    //outarg->inst_imm = 16 + ainfo->offset + (slot - 8) * 8;
+                    outarg->dreg = ainfo->offset + (slot - 22) * 8;
+
+                    if (outarg != call->out_args)
+                      {
+                        outarg->next = call->out_args;
+                        call->out_args = outarg;
+                      }
+                  }
+               
+                /* Trees can't be shared so make a copy*/
+                MONO_INST_NEW (cfg, arg, CEE_STIND_I);
+                arg->cil_code = in->cil_code;
+                arg->ssa_op = MONO_SSA_STORE;
+                arg->inst_left = vtaddr;
+                arg->inst_right = in;
+                arg->type = in->type;
+
+                /* prepend, so they get reversed */
+                arg->next = call->out_args;
+                call->out_args = arg;
+              }
+            else
+              {
+                MonoInst *stack_addr;
 
-              CFG_DEBUG(3) g_print("value type, size:%d\n", size);
+                CFG_DEBUG(3) g_print("value type, size:%d\n", size);
 
-              MONO_INST_NEW (cfg, stack_addr, OP_REGOFFSET);
-              stack_addr->inst_basereg = alpha_sp;
-              //stack_addr->inst_offset = -(cinfo->stack_usage - ainfo->offset);
-              stack_addr->inst_offset = ainfo->offset;
-              //stack_addr->inst_offset = 16 + ainfo->offset;
-              stack_addr->inst_imm = size;
+                MONO_INST_NEW (cfg, stack_addr, OP_REGOFFSET);
+                stack_addr->inst_basereg = alpha_sp;
+                //stack_addr->inst_offset = -(cinfo->stack_usage - ainfo->offset);
+                stack_addr->inst_offset = ainfo->offset;
+                //stack_addr->inst_offset = 16 + ainfo->offset;
+                stack_addr->inst_imm = size;
 
-              arg->opcode = OP_OUTARG_VT;
-              arg->inst_right = stack_addr;
-            }
+                arg->opcode = OP_OUTARG_VT;
+                arg->inst_right = stack_addr;
+              }
 
             /*
-            arg->opcode = OP_OUTARG_VT;
-            arg->klass = in->klass;
-            arg->unused = sig->pinvoke;
-            arg->inst_imm = size; */
+              arg->opcode = OP_OUTARG_VT;
+              arg->klass = in->klass;
+              arg->backend.is_pinvoke = sig->pinvoke;
+              arg->inst_imm = size; */
           }
         else
           {
@@ -4630,8 +5034,8 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb,
                       arg->opcode = OP_OUTARG_R8;
                 }
                 break;
-               case ArgInFloatSSEReg:
-               case ArgInDoubleSSEReg:
+               case ArgInFloatReg:
+               case ArgInDoubleReg:
                  add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
                break;
               default:
@@ -4641,35 +5045,36 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb,
        }
      }
 
-   if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
-     if (cinfo->ret.storage == ArgValuetypeInReg) {
-       MonoInst *zero_inst;
-       /*
-       * After the call, the struct is in registers, but needs to be saved to the
-       memory pointed
-       * to by vt_arg in this_vret_args. This means that vt_ar
-       g needs to be saved somewhere
-       * before calling the function. So we add a dummy instru
-       ction to represent pushing the
-       * struct return address to the stack. The return addres
-       s will be saved to this stack slot
-       * by the code emitted in this_vret_args.
-       */
-       MONO_INST_NEW (cfg, arg, OP_OUTARG);
-       MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
-       zero_inst->inst_p0 = 0;
-       arg->inst_left = zero_inst;
-       arg->type = STACK_PTR;
-       /* prepend, so they get reversed */
-       arg->next = call->out_args;
-       call->out_args = arg;
+   if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
+     {
+       if (cinfo->ret.storage == ArgValuetypeInReg) {
+        MonoInst *zero_inst;
+        /*
+         * After the call, the struct is in registers, but needs to be saved
+         to the memory pointed
+         * to by vt_arg in this_vret_args. This means that vt_ar
+         g needs to be saved somewhere
+         * before calling the function. So we add a dummy instru
+         ction to represent pushing the
+         * struct return address to the stack. The return addres
+         s will be saved to this stack slot
+         * by the code emitted in this_vret_args.
+         */
+        MONO_INST_NEW (cfg, arg, OP_OUTARG);
+        MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
+        zero_inst->inst_p0 = 0;
+        arg->inst_left = zero_inst;
+        arg->type = STACK_PTR;
+        /* prepend, so they get reversed */
+        arg->next = call->out_args;
+        call->out_args = arg;
+       }
+       else
+        /* if the function returns a struct, the called method a
+           lready does a ret $0x4 */
+        if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
+          ; //cinfo->stack_usage -= 4;
      }
-     else
-       /* if the function returns a struct, the called method a
-         lready does a ret $0x4 */
-       if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
-        ; //cinfo->stack_usage -= 4;
-   }
    
    // stack_usage shows how much stack we would need to do the call
    // (for example for params that we pass on stack
@@ -4733,9 +5138,9 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
    
    CFG_DEBUG(2) ALPHA_DEBUG("mono_arch_get_global_int_regs");
    
-   regs = g_list_prepend (regs, (gpointer)alpha_r9);
-   regs = g_list_prepend (regs, (gpointer)alpha_r10);
-   regs = g_list_prepend (regs, (gpointer)alpha_r11);
+//   regs = g_list_prepend (regs, (gpointer)alpha_r9);
+//   regs = g_list_prepend (regs, (gpointer)alpha_r10);
+//   regs = g_list_prepend (regs, (gpointer)alpha_r11);
    regs = g_list_prepend (regs, (gpointer)alpha_r12);
    regs = g_list_prepend (regs, (gpointer)alpha_r13);
    regs = g_list_prepend (regs, (gpointer)alpha_r14);
@@ -4745,42 +5150,6 @@ mono_arch_get_global_int_regs (MonoCompile *cfg)
 
 /*========================= End of Function ========================*/
 
-static gboolean
-is_regsize_var (MonoType *t)
-{
-  if (t->byref)
-    return TRUE;
-
-  t = mono_type_get_underlying_type (t);
-  switch (t->type) {
-  case MONO_TYPE_I1:
-  case MONO_TYPE_U1:
-  case MONO_TYPE_I2:
-  case MONO_TYPE_U2:
-  case MONO_TYPE_I4:
-  case MONO_TYPE_U4:
-  case MONO_TYPE_I:
-  case MONO_TYPE_U:
-  case MONO_TYPE_PTR:
-  case MONO_TYPE_FNPTR:
-  case MONO_TYPE_BOOLEAN:
-    return TRUE;
-  case MONO_TYPE_OBJECT:
-  case MONO_TYPE_STRING:
-  case MONO_TYPE_CLASS:
-  case MONO_TYPE_SZARRAY:
-  case MONO_TYPE_ARRAY:
-    return TRUE;
-  case MONO_TYPE_VALUETYPE:
-    return FALSE;
-  }
-
-  return FALSE;
-}
-
-
-
-
 /*------------------------------------------------------------------*/
 /*                                                                  */
 /* Name         - mono_arch_get_allocatable_int_vars                */
@@ -4804,11 +5173,11 @@ mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
 
    sig = mono_method_signature (cfg->method);
 
-   cinfo = get_call_info (sig, FALSE);
+   cinfo = get_call_info (cfg->generic_sharing_context, sig, FALSE);
 
    for (i = 0; i < sig->param_count + sig->hasthis; ++i)
      {
-       MonoInst *ins = cfg->varinfo [i];
+       MonoInst *ins = cfg->args [i];
 
        ArgInfo *ainfo = &cinfo->args [i];
 
@@ -4837,7 +5206,7 @@ mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
           (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
         continue;
 
-       if (is_regsize_var (ins->inst_vtype))
+       if (mono_is_regsize_var (ins->inst_vtype))
         {
           g_assert (MONO_VARINFO (cfg, i)->reg == -1);
           g_assert (i == vmv->idx);
@@ -4986,7 +5355,7 @@ mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p,
       /* Allocate a new area on the stack and save arguments there */
       sig = mono_method_signature (cfg->method);
 
-      cinfo = get_call_info (sig, FALSE);
+      cinfo = get_call_info (cfg->generic_sharing_context, sig, FALSE);
 
       n = sig->param_count + sig->hasthis;
 
@@ -4998,16 +5367,16 @@ mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p,
       
       for (i = 0; i < n; ++i)
        {
-         inst = cfg->varinfo [i];
+         inst = cfg->args [i];
 
          if (inst->opcode == OP_REGVAR)
            {
              switch(cinfo->args[i].storage)
                {
-               case ArgInDoubleSSEReg:
+               case ArgInDoubleReg:
                  alpha_stt(code, inst->dreg, alpha_sp, (i*8));
                   break;
-               case ArgInFloatSSEReg:
+               case ArgInFloatReg:
                  alpha_sts(code, inst->dreg, alpha_sp, (i*8));
                  break;
                default:
@@ -5235,7 +5604,7 @@ mono_arch_allocate_vars (MonoCompile *cfg)
    
    sig = mono_method_signature (cfg->method);
    
-   cinfo = get_call_info (sig, FALSE);
+   cinfo = get_call_info (cfg->generic_sharing_context, sig, FALSE);
    
    /* if (cfg->arch.omit_fp) {
       cfg->flags |= MONO_CFG_HAS_SPILLUP;
@@ -5280,8 +5649,8 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        switch (cinfo->ret.storage)
         {
         case ArgInIReg:
-        case ArgInFloatSSEReg:
-        case ArgInDoubleSSEReg:
+        case ArgInFloatReg:
+        case ArgInDoubleReg:
           if ((MONO_TYPE_ISSTRUCT (sig->ret) &&
                !mono_class_from_mono_type (sig->ret)->enumtype) ||
               (sig->ret->type == MONO_TYPE_TYPEDBYREF))
@@ -5362,8 +5731,6 @@ mono_arch_allocate_vars (MonoCompile *cfg)
        }
      }
    
-   g_free (offsets);
-
    // TODO check how offsets[i] are calculated
    // it seems they are points to the end on data. Like 8, but it actually - 0
 
@@ -5393,12 +5760,24 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 
    CFG_DEBUG(3) g_print ("ALPHA: args_save_area_offset at %d(%x)\n", offset, offset);
 
-   for (i = 0; i < PARAM_REGS; ++i)
-     if (i < (sig->param_count + sig->hasthis))
-        //(cfg->used_int_regs & (1 << param_regs[i])))
-       {
-        offset += sizeof (gpointer);
-       }
+   for (i = 0; i < sig->param_count + sig->hasthis; ++i)
+     {
+       ArgInfo *ainfo = &cinfo->args [i];
+
+       switch(ainfo->storage)
+        {
+        case ArgInIReg:
+        case ArgInFloatReg:
+        case ArgInDoubleReg:
+          offset += sizeof (gpointer);
+          break;
+        case ArgAggregate:
+          offset += ainfo->nregs * sizeof (gpointer);
+          break;
+        default:
+          ;
+        }
+     }
 
    CFG_DEBUG(3) g_print ("ALPHA: Stack size is %d(%x)\n",
                          offset, offset);
@@ -5406,7 +5785,7 @@ mono_arch_allocate_vars (MonoCompile *cfg)
    // Reserve space for method params
    for (i = 0; i < sig->param_count + sig->hasthis; ++i)
      {
-       inst = cfg->varinfo [i];
+       inst = cfg->args [i];
 
        if (inst->opcode != OP_REGVAR)
         {
@@ -5436,8 +5815,8 @@ mono_arch_allocate_vars (MonoCompile *cfg)
             inreg = FALSE;
                 
           if (//(ainfo->storage == ArgInIReg) ||
-              (ainfo->storage == ArgInFloatSSEReg) ||
-              (ainfo->storage == ArgInDoubleSSEReg) ||
+              (ainfo->storage == ArgInFloatReg) ||
+              (ainfo->storage == ArgInDoubleReg) ||
               (ainfo->storage == ArgValuetypeInReg))
             inreg = FALSE;
                 
@@ -5446,8 +5825,8 @@ mono_arch_allocate_vars (MonoCompile *cfg)
           switch (ainfo->storage)
             {
             case ArgInIReg:
-            case ArgInFloatSSEReg:
-            case ArgInDoubleSSEReg:
+            case ArgInFloatReg:
+            case ArgInDoubleReg:
               inst->opcode = OP_REGVAR;
               inst->dreg = ainfo->reg;
               break;
@@ -5463,6 +5842,10 @@ mono_arch_allocate_vars (MonoCompile *cfg)
               break;
             case ArgValuetypeInReg:
               break;
+            case ArgAggregate:
+              inreg = FALSE;
+              break;
+
             default:
               NOT_IMPLEMENTED("");
             }
@@ -5483,8 +5866,14 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                 // 2 * sizeof (gpointer) : sizeof (gpointer);
 
                 inst->inst_offset = cfg->arch.args_save_area_offset + a_off;
-               a_off += 8;
-
+                switch(ainfo->storage)
+                  {
+                  case ArgAggregate:
+                    a_off += ainfo->nslots * 8;
+                    break;
+                  default:
+                    a_off += sizeof (gpointer);
+                  }
                //   (/*(ainfo->reg - 16)*/ i * 8);
               }
             }
@@ -5549,19 +5938,20 @@ mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs)
          pc, regs);
 
   // Check if we have parameters on stack
-  if (pc[-2] & 0xFFFF0000 == 0x23DE0000)     // lda     sp,-n(sp)
+  if ((pc[-2] & 0xFFFF0000) == 0x23DE0000)     // lda     sp,-n(sp)
     start_index = -3;
 
   // Check for (call_membase):
   // -4: mov     v0,a0        - load this ???
-  // -3: ldq     v0,0(v0)     - load vtable
+  // -3: ldq     v0,0(v0)     - load vtable 
   // -2: ldq     t12,64(v0)   - load method (object->vtable->vtable[method->slot])
-  if ((pc[start_index-1] & 0xFFFFFFFF) == 0xA4000000 &&
+  if ((pc[start_index-1] & 0xFC00FFFF) == 0xA4000000 &&
       (pc[start_index] & 0xFFFF0000) == 0xA7600000
       )
     {
       disp = pc[start_index] & 0xFFFF;
-      reg = 0; // For now
+      reg = (pc[start_index-1] >> AXP_REG1_SHIFT) & AXP_REG_MASK;
+      //reg = 0; // For now
 
       ALPHA_PRINT g_debug("ALPHA_CHECK: [mono_arch_get_vcall_slot_addr callvirt] call_membase");
 
@@ -5573,7 +5963,7 @@ mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs)
   // -4: ldq     v0,0(v0)
   // -3: ldq     v0,-n(v0)
   // -2: ldq     t12,0(v0)
-  if ((pc[start_index-2] & 0xFFFFFFFF) == 0xA4000000 &&
+  if ((pc[start_index-2] & 0xFC00FFFF) == 0xA4000000 &&
       (pc[start_index-1] & 0xFFFF0000) == 0xA4000000 &&
       (pc[start_index] & 0xFFFF0000) == 0xA7600000
       )