Add support for OP_SETFRET to the llvm backend.
[mono.git] / mono / mini / mini-llvm.c
index bab085fb25237af5fe2bdd50cb910f94da9b9541..c459b388e1b0a424e0c605266d8d8cae7cb1acf2 100644 (file)
@@ -624,6 +624,8 @@ simd_op_to_intrins (int opcode)
                return "llvm.x86.sse41.pminuw";
        case OP_PMINB_UN:
                return "llvm.x86.sse2.pminu.b";
+       case OP_PMINW:
+               return "llvm.x86.sse2.pmins.w";
        case OP_MAXPD:
                return "llvm.x86.sse2.max.pd";
        case OP_MAXPS:
@@ -634,6 +636,16 @@ simd_op_to_intrins (int opcode)
                return "llvm.x86.sse41.pmaxuw";
        case OP_PMAXB_UN:
                return "llvm.x86.sse2.pmaxu.b";
+       case OP_PCMPEQB:
+               return "llvm.x86.sse2.pcmpeq.b";
+       case OP_PCMPEQW:
+               return "llvm.x86.sse2.pcmpeq.w";
+       case OP_PCMPEQD:
+               return "llvm.x86.sse2.pcmpeq.d";
+       case OP_PCMPEQQ:
+               return "llvm.x86.sse41.pcmpeqq";
+       case OP_PCMPGTB:
+               return "llvm.x86.sse2.pcmpgt.b";
 #endif
        default:
                g_assert_not_reached ();
@@ -641,6 +653,39 @@ simd_op_to_intrins (int opcode)
        }
 }
 
+static LLVMTypeRef
+simd_op_to_llvm_type (int opcode)
+{
+#if defined(TARGET_X86) || defined(TARGET_AMD64)
+       switch (opcode) {
+       case OP_EXTRACT_R8:
+       case OP_EXPAND_R8:
+               return LLVMVectorType (LLVMDoubleType (), 2);
+       case OP_EXTRACT_I8:
+       case OP_EXPAND_I8:
+               return LLVMVectorType (LLVMInt64Type (), 2);
+       case OP_EXTRACT_I4:
+       case OP_EXPAND_I4:
+               return LLVMVectorType (LLVMInt32Type (), 4);
+       case OP_EXTRACT_I2:
+       case OP_EXTRACT_U2:
+       case OP_EXPAND_I2:
+               return LLVMVectorType (LLVMInt16Type (), 8);
+       case OP_EXTRACT_I1:
+       case OP_EXTRACT_U1:
+       case OP_EXPAND_I1:
+               return LLVMVectorType (LLVMInt8Type (), 16);
+       case OP_EXPAND_R4:
+               return LLVMVectorType (LLVMFloatType (), 4);
+       default:
+               g_assert_not_reached ();
+               return NULL;
+       }
+#else
+       return NULL;
+#endif
+}
+
 /*
  * get_bb:
  *
@@ -784,7 +829,7 @@ emit_volatile_load (EmitContext *ctx, int vreg)
                 * Might have to zero extend since llvm doesn't have 
                 * unsigned types.
                 */
-               if (t->type == MONO_TYPE_U1 || t->type == MONO_TYPE_U2)
+               if (t->type == MONO_TYPE_U1 || t->type == MONO_TYPE_U2 || t->type == MONO_TYPE_CHAR || t->type == MONO_TYPE_BOOLEAN)
                        v = LLVMBuildZExt (ctx->builder, v, LLVMInt32Type (), "");
                else if (t->type == MONO_TYPE_U8)
                        v = LLVMBuildZExt (ctx->builder, v, LLVMInt64Type (), "");
@@ -867,7 +912,7 @@ sig_to_llvm_sig_full (EmitContext *ctx, MonoMethodSignature *sig, LLVMCallInfo *
                param_types [pindex] = IntPtrType ();
                pindex ++;
        }
-       if (cinfo && cinfo->imt_arg) {
+       if (cinfo && cinfo->imt_arg && IS_LLVM_MONO_BRANCH) {
                if (sinfo)
                        sinfo->imt_arg_pindex = pindex;
                param_types [pindex] = IntPtrType ();
@@ -1038,6 +1083,10 @@ get_plt_entry (EmitContext *ctx, LLVMTypeRef llvm_sig, MonoJumpInfoType type, gc
        if (!callee_name)
                return NULL;
 
+       if (ctx->cfg->compile_aot)
+               /* Add a patch so referenced wrappers can be compiled in full aot mode */
+               mono_add_patch_info (ctx->cfg, 0, type, data);
+
        // FIXME: Locking
        callee = g_hash_table_lookup (ctx->lmodule->plt_entries, callee_name);
        if (!callee) {
@@ -1073,6 +1122,22 @@ get_handler_clause (MonoCompile *cfg, MonoBasicBlock *bb)
        return -1;
 }
 
+static void
+set_metadata_flag (LLVMValueRef v, const char *flag_name)
+{
+#if LLVM_CHECK_VERSION (2, 8)
+       LLVMValueRef md_arg;
+       int md_kind;
+       
+       if (!IS_LLVM_MONO_BRANCH)
+               return;
+
+       md_kind = LLVMGetMDKindID (flag_name, strlen (flag_name));
+       md_arg = LLVMMDString ("mono", 4);
+       LLVMSetMetadata (v, md_kind, LLVMMDNode (&md_arg, 1));
+#endif
+}
+
 /*
  * emit_call:
  *
@@ -1135,12 +1200,12 @@ emit_load (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref, in
        LLVMValueRef args [16], res;
        LLVMTypeRef addr_type;
 
-       if (is_faulting && IS_LLVM_MONO_BRANCH) {
+       if (is_faulting && bb->region != -1 && IS_LLVM_MONO_BRANCH) {
                /*
                 * We handle loads which can fault by calling a mono specific intrinsic
                 * using an invoke, so they are handled properly inside try blocks.
-                * We use this even outside clauses, since LLVM might be able to hoist them
-                * out of loops.
+                * We can't use this outside clauses, since LLVM optimizes intrinsics which
+                * are marked with IntrReadArgMem.
                 */
                switch (size) {
                case 1:
@@ -1175,12 +1240,22 @@ emit_load (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref, in
                
                return res;
        } else {
+               LLVMValueRef res;
+
                /* 
                 * We emit volatile loads for loads which can fault, because otherwise
                 * LLVM will generate invalid code when encountering a load from a
                 * NULL address.
                 */
-               return mono_llvm_build_load (*builder_ref, addr, name, is_faulting);
+                res = mono_llvm_build_load (*builder_ref, addr, name, is_faulting);
+
+                /* Mark it with a custom metadata */
+                /*
+                if (is_faulting)
+                        set_metadata_flag (res, "mono.faulting.load");
+                */
+
+                return res;
        }
 }
 
@@ -1190,7 +1265,7 @@ emit_store (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref, i
        const char *intrins_name;
        LLVMValueRef args [16];
 
-       if (is_faulting && IS_LLVM_MONO_BRANCH) {
+       if (is_faulting && bb->region != -1 && IS_LLVM_MONO_BRANCH) {
                switch (size) {
                case 1:
                        intrins_name = "llvm.mono.store.i8.p0i8";
@@ -1540,9 +1615,7 @@ emit_entry_bb (EmitContext *ctx, LLVMBuilderRef builder)
                        emit_volatile_store (ctx, cfg->args [i + sig->hasthis]->dreg);
 
        if (sig->hasthis && !cfg->rgctx_var && cfg->generic_sharing_context) {
-#if LLVM_CHECK_VERSION (2, 8)
-               LLVMValueRef this_alloc, md_arg;
-               int md_kind;
+               LLVMValueRef this_alloc;
 
                /*
                 * The exception handling code needs the location where the this argument was
@@ -1554,16 +1627,11 @@ emit_entry_bb (EmitContext *ctx, LLVMBuilderRef builder)
                /* This volatile store will keep the alloca alive */
                mono_llvm_build_store (builder, ctx->values [cfg->args [0]->dreg], this_alloc, TRUE);
 
-               md_kind = LLVMGetMDKindID ("mono.this", strlen ("mono.this"));
-               md_arg = LLVMMDString ("this", 4);
-               LLVMSetMetadata (this_alloc, md_kind, LLVMMDNode (&md_arg, 1));
-#endif
+               set_metadata_flag (this_alloc, "mono.this");
        }
 
        if (cfg->rgctx_var) {
-#if LLVM_CHECK_VERSION (2, 8)
-               LLVMValueRef rgctx_alloc, store, md_arg;
-               int md_kind;
+               LLVMValueRef rgctx_alloc, store;
 
                /*
                 * We handle the rgctx arg similarly to the this pointer.
@@ -1573,10 +1641,7 @@ emit_entry_bb (EmitContext *ctx, LLVMBuilderRef builder)
                /* This volatile store will keep the alloca alive */
                store = mono_llvm_build_store (builder, ctx->rgctx_arg, rgctx_alloc, TRUE);
 
-               md_kind = LLVMGetMDKindID ("mono.this", strlen ("mono.this"));
-               md_arg = LLVMMDString ("this", 4);
-               LLVMSetMetadata (rgctx_alloc, md_kind, LLVMMDNode (&md_arg, 1));
-#endif
+               set_metadata_flag (rgctx_alloc, "mono.this");
        }
 
        /*
@@ -1634,7 +1699,7 @@ process_call (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref,
        LLVMValueRef *args;
        LLVMCallInfo *cinfo;
        GSList *l;
-       int i;
+       int i, len;
        gboolean vretaddr;
        LLVMTypeRef llvm_sig;
        gpointer target;
@@ -1757,10 +1822,6 @@ process_call (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref,
                g_assert (ins->inst_offset % size == 0);
                index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset / size, FALSE);
 
-               // FIXME: mono_arch_get_vcall_slot () can't decode the code
-               // generated by LLVM
-               //LLVM_FAILURE (ctx, "virtual call");
-
                /*
                 * When using the llvm mono branch, we can support IMT directly, otherwise
                 * we need to call a trampoline.
@@ -1795,7 +1856,9 @@ process_call (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref,
        /* 
         * Collect and convert arguments
         */
-       args = alloca (sizeof (LLVMValueRef) * ((sig->param_count * 2) + sig->hasthis + vretaddr + call->rgctx_reg));
+       len = sizeof (LLVMValueRef) * ((sig->param_count * 2) + sig->hasthis + vretaddr + call->rgctx_reg);
+       args = alloca (len);
+       memset (args, 0, len);
        l = call->out_ireg_args;
 
        if (IS_LLVM_MONO_BRANCH) {
@@ -1878,7 +1941,7 @@ process_call (EmitContext *ctx, MonoBasicBlock *bb, LLVMBuilderRef *builder_ref,
 #endif
        /* The two can't be used together, so use only one LLVM calling conv to pass them */
        g_assert (!(call->rgctx_arg_reg && call->imt_arg_reg));
-       if (call->rgctx_arg_reg || call->imt_arg_reg)
+       if (!sig->pinvoke)
                LLVMSetInstructionCallConv (lcall, LLVMMono1CallConv);
 
        if (call->rgctx_arg_reg)
@@ -2009,8 +2072,12 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                         */
                        //LLVM_FAILURE (ctx, "aot+clauses");
                } else {
-                       /* exception_cb will decode this */
-                       ti = g_malloc (sizeof (gint32));
+                       /*
+                        * After the cfg mempool is freed, the type info will point to stale memory,
+                        * but this is not a problem, since we decode it once in exception_cb during
+                        * compilation.
+                        */
+                       ti = mono_mempool_alloc (cfg->mempool, sizeof (gint32));
                        *(gint32*)ti = clause_index;
 
                        type_info = LLVMAddGlobal (module, i8ptr, ti_name);
@@ -2196,126 +2263,83 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                case OP_COMPARE:
                case OP_ICOMPARE_IMM:
                case OP_LCOMPARE_IMM:
-               case OP_COMPARE_IMM:
-#ifdef TARGET_AMD64
-               case OP_AMD64_ICOMPARE_MEMBASE_REG:
-               case OP_AMD64_ICOMPARE_MEMBASE_IMM:
-#endif
-#ifdef TARGET_X86
-               case OP_X86_COMPARE_MEMBASE_REG:
-               case OP_X86_COMPARE_MEMBASE_IMM:
-#endif
-                       {
-                               CompRelation rel;
-                               LLVMValueRef cmp;
-
-                               if (ins->next->opcode == OP_NOP)
-                                       break;
-
-                               if (ins->next->opcode == OP_BR)
-                                       /* The comparison result is not needed */
-                                       continue;
-
-                               rel = mono_opcode_to_cond (ins->next->opcode);
-
-                               /* Used for implementing bound checks */
-#ifdef TARGET_AMD64
-                               if ((ins->opcode == OP_AMD64_ICOMPARE_MEMBASE_REG) || (ins->opcode == OP_AMD64_ICOMPARE_MEMBASE_IMM)) {
-                                       int size = 4;
-                                       LLVMValueRef index;
-                                       LLVMTypeRef t;
-
-                                       t = LLVMInt32Type ();
-
-                                       g_assert (ins->inst_offset % size == 0);
-                                       index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset / size, FALSE);                                
-
-                                       lhs = LLVMBuildLoad (builder, LLVMBuildGEP (builder, convert (ctx, values [ins->inst_basereg], LLVMPointerType (t, 0)), &index, 1, ""), "");
-                               }
-                               if (ins->opcode == OP_AMD64_ICOMPARE_MEMBASE_IMM) {
-                                       lhs = convert (ctx, lhs, LLVMInt32Type ());
-                                       rhs = LLVMConstInt (LLVMInt32Type (), ins->inst_imm, FALSE);
-                               }
-                               if (ins->opcode == OP_AMD64_ICOMPARE_MEMBASE_REG)
-                                       rhs = convert (ctx, rhs, LLVMInt32Type ());
-#endif
-
-#ifdef TARGET_X86
-                               if ((ins->opcode == OP_X86_COMPARE_MEMBASE_REG) || (ins->opcode == OP_X86_COMPARE_MEMBASE_IMM)) {
-                                       int size = 4;
-                                       LLVMValueRef index;
-                                       LLVMTypeRef t;
+               case OP_COMPARE_IMM: {
+                       CompRelation rel;
+                       LLVMValueRef cmp;
 
-                                       t = LLVMInt32Type ();
+                       if (ins->next->opcode == OP_NOP)
+                               break;
 
-                                       g_assert (ins->inst_offset % size == 0);
-                                       index = LLVMConstInt (LLVMInt32Type (), ins->inst_offset / size, FALSE);                                
+                       if (ins->next->opcode == OP_BR)
+                               /* The comparison result is not needed */
+                               continue;
 
-                                       lhs = LLVMBuildLoad (builder, LLVMBuildGEP (builder, convert (ctx, values [ins->inst_basereg], LLVMPointerType (t, 0)), &index, 1, ""), "");
-                               }
-                               if (ins->opcode == OP_X86_COMPARE_MEMBASE_IMM) {
-                                       lhs = convert (ctx, lhs, LLVMInt32Type ());
-                                       rhs = LLVMConstInt (LLVMInt32Type (), ins->inst_imm, FALSE);
-                               }
-                               if (ins->opcode == OP_X86_COMPARE_MEMBASE_REG)
-                                       rhs = convert (ctx, rhs, LLVMInt32Type ());
-#endif
+                       rel = mono_opcode_to_cond (ins->next->opcode);
 
-                               if (ins->opcode == OP_ICOMPARE_IMM) {
-                                       lhs = convert (ctx, lhs, LLVMInt32Type ());
-                                       rhs = LLVMConstInt (LLVMInt32Type (), ins->inst_imm, FALSE);
-                               }
-                               if (ins->opcode == OP_LCOMPARE_IMM) {
-                                       lhs = convert (ctx, lhs, LLVMInt64Type ());
-                                       rhs = LLVMConstInt (LLVMInt64Type (), GET_LONG_IMM (ins), FALSE);
-                               }
-                               if (ins->opcode == OP_LCOMPARE) {
-                                       lhs = convert (ctx, lhs, LLVMInt64Type ());
-                                       rhs = convert (ctx, rhs, LLVMInt64Type ());
-                               }
-                               if (ins->opcode == OP_ICOMPARE) {
-                                       lhs = convert (ctx, lhs, LLVMInt32Type ());
-                                       rhs = convert (ctx, rhs, LLVMInt32Type ());
-                               }
+                       if (ins->opcode == OP_ICOMPARE_IMM) {
+                               lhs = convert (ctx, lhs, LLVMInt32Type ());
+                               rhs = LLVMConstInt (LLVMInt32Type (), ins->inst_imm, FALSE);
+                       }
+                       if (ins->opcode == OP_LCOMPARE_IMM) {
+                               lhs = convert (ctx, lhs, LLVMInt64Type ());
+                               rhs = LLVMConstInt (LLVMInt64Type (), GET_LONG_IMM (ins), FALSE);
+                       }
+                       if (ins->opcode == OP_LCOMPARE) {
+                               lhs = convert (ctx, lhs, LLVMInt64Type ());
+                               rhs = convert (ctx, rhs, LLVMInt64Type ());
+                       }
+                       if (ins->opcode == OP_ICOMPARE) {
+                               lhs = convert (ctx, lhs, LLVMInt32Type ());
+                               rhs = convert (ctx, rhs, LLVMInt32Type ());
+                       }
 
-                               if (lhs && rhs) {
-                                       if (LLVMGetTypeKind (LLVMTypeOf (lhs)) == LLVMPointerTypeKind)
-                                               rhs = convert (ctx, rhs, LLVMTypeOf (lhs));
-                                       else if (LLVMGetTypeKind (LLVMTypeOf (rhs)) == LLVMPointerTypeKind)
-                                               lhs = convert (ctx, lhs, LLVMTypeOf (rhs));
-                               }
+                       if (lhs && rhs) {
+                               if (LLVMGetTypeKind (LLVMTypeOf (lhs)) == LLVMPointerTypeKind)
+                                       rhs = convert (ctx, rhs, LLVMTypeOf (lhs));
+                               else if (LLVMGetTypeKind (LLVMTypeOf (rhs)) == LLVMPointerTypeKind)
+                                       lhs = convert (ctx, lhs, LLVMTypeOf (rhs));
+                       }
 
-                               /* We use COMPARE+SETcc/Bcc, llvm uses SETcc+br cond */
-                               if (ins->opcode == OP_FCOMPARE)
-                                       cmp = LLVMBuildFCmp (builder, fpcond_to_llvm_cond [rel], convert (ctx, lhs, LLVMDoubleType ()), convert (ctx, rhs, LLVMDoubleType ()), "");
-                               else if (ins->opcode == OP_COMPARE_IMM)
-                                       cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_imm, FALSE), "");
-                               else if (ins->opcode == OP_COMPARE)
-                                       cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, IntPtrType ()), convert (ctx, rhs, IntPtrType ()), "");
-                               else
-                                       cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], lhs, rhs, "");
-
-                               if (MONO_IS_COND_BRANCH_OP (ins->next)) {
-                                       LLVMBuildCondBr (builder, cmp, get_bb (ctx, ins->next->inst_true_bb), get_bb (ctx, ins->next->inst_false_bb));
-                                       has_terminator = TRUE;
-                               } else if (MONO_IS_SETCC (ins->next)) {
-                                       sprintf (dname_buf, "t%d", ins->next->dreg);
-                                       dname = dname_buf;
-                                       values [ins->next->dreg] = LLVMBuildZExt (builder, cmp, LLVMInt32Type (), dname);
-
-                                       /* Add stores for volatile variables */
-                                       emit_volatile_store (ctx, ins->next->dreg);
-                               } else if (MONO_IS_COND_EXC (ins->next)) {
-                                       emit_cond_system_exception (ctx, bb, ins->next->inst_p1, cmp);
-                                       CHECK_FAILURE (ctx);
-                                       builder = ctx->builder;
+                       /* We use COMPARE+SETcc/Bcc, llvm uses SETcc+br cond */
+                       if (ins->opcode == OP_FCOMPARE)
+                               cmp = LLVMBuildFCmp (builder, fpcond_to_llvm_cond [rel], convert (ctx, lhs, LLVMDoubleType ()), convert (ctx, rhs, LLVMDoubleType ()), "");
+                       else if (ins->opcode == OP_COMPARE_IMM)
+                               cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, IntPtrType ()), LLVMConstInt (IntPtrType (), ins->inst_imm, FALSE), "");
+                       else if (ins->opcode == OP_LCOMPARE_IMM) {
+                               if (SIZEOF_REGISTER == 4 && COMPILE_LLVM (cfg))  {
+                                       /* The immediate is encoded in two fields */
+                                       guint64 l = ((guint64)(guint32)ins->inst_offset << 32) | ((guint32)ins->inst_imm);
+                                       cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, LLVMInt64Type ()), LLVMConstInt (LLVMInt64Type (), l, FALSE), "");
                                } else {
-                                       LLVM_FAILURE (ctx, "next");
+                                       cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, LLVMInt64Type ()), LLVMConstInt (LLVMInt64Type (), ins->inst_imm, FALSE), "");
                                }
+                       }
+                       else if (ins->opcode == OP_COMPARE)
+                               cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], convert (ctx, lhs, IntPtrType ()), convert (ctx, rhs, IntPtrType ()), "");
+                       else
+                               cmp = LLVMBuildICmp (builder, cond_to_llvm_cond [rel], lhs, rhs, "");
 
-                               ins = ins->next;
-                               break;
+                       if (MONO_IS_COND_BRANCH_OP (ins->next)) {
+                               LLVMBuildCondBr (builder, cmp, get_bb (ctx, ins->next->inst_true_bb), get_bb (ctx, ins->next->inst_false_bb));
+                               has_terminator = TRUE;
+                       } else if (MONO_IS_SETCC (ins->next)) {
+                               sprintf (dname_buf, "t%d", ins->next->dreg);
+                               dname = dname_buf;
+                               values [ins->next->dreg] = LLVMBuildZExt (builder, cmp, LLVMInt32Type (), dname);
+
+                               /* Add stores for volatile variables */
+                               emit_volatile_store (ctx, ins->next->dreg);
+                       } else if (MONO_IS_COND_EXC (ins->next)) {
+                               emit_cond_system_exception (ctx, bb, ins->next->inst_p1, cmp);
+                               CHECK_FAILURE (ctx);
+                               builder = ctx->builder;
+                       } else {
+                               LLVM_FAILURE (ctx, "next");
                        }
+
+                       ins = ins->next;
+                       break;
+               }
                case OP_FCEQ:
                case OP_FCLT:
                case OP_FCLT_UN:
@@ -2389,6 +2413,7 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                case OP_MOVE:
                case OP_LMOVE:
                case OP_XMOVE:
+               case OP_SETFRET:
                        g_assert (lhs);
                        values [ins->dreg] = lhs;
                        break;
@@ -2757,7 +2782,7 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                case OP_LOCALLOC: {
                        LLVMValueRef v, size;
                                
-                       size = LLVMBuildAnd (builder, LLVMBuildAdd (builder, lhs, LLVMConstInt (LLVMInt32Type (), MONO_ARCH_FRAME_ALIGNMENT - 1, FALSE), ""), LLVMConstInt (LLVMInt32Type (), ~ (MONO_ARCH_FRAME_ALIGNMENT - 1), FALSE), "");
+                       size = LLVMBuildAnd (builder, LLVMBuildAdd (builder, convert (ctx, lhs, LLVMInt32Type ()), LLVMConstInt (LLVMInt32Type (), MONO_ARCH_FRAME_ALIGNMENT - 1, FALSE), ""), LLVMConstInt (LLVMInt32Type (), ~ (MONO_ARCH_FRAME_ALIGNMENT - 1), FALSE), "");
 
                        v = mono_llvm_build_alloca (builder, LLVMInt8Type (), size, MONO_ARCH_FRAME_ALIGNMENT, "");
 
@@ -2819,6 +2844,15 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
 
                        values [ins->dreg] = emit_load (ctx, bb, &builder, size, addr, dname, is_volatile);
 
+                       if (!is_volatile && (ins->flags & MONO_INST_CONSTANT_LOAD)) {
+                               /*
+                                * These will signal LLVM that these loads do not alias any stores, and
+                                * they can't fail, allowing them to be hoisted out of loops.
+                                */
+                               set_metadata_flag (values [ins->dreg], "mono.noalias");
+                               set_metadata_flag (values [ins->dreg], "mono.nofail.load");
+                       }
+
                        if (sext)
                                values [ins->dreg] = LLVMBuildSExt (builder, values [ins->dreg], LLVMInt32Type (), dname);
                        else if (zext)
@@ -3225,6 +3259,11 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
 
                        switch (ins->opcode) {
                        case OP_STOREV_MEMBASE:
+                               if (cfg->gen_write_barriers && klass->has_references && ins->inst_destbasereg != cfg->frame_reg) {
+                                       /* FIXME: Emit write barriers like in mini_emit_stobj () */
+                                       LLVM_FAILURE (ctx, "storev_membase + write barriers");
+                                       break;
+                               }
                                if (!addresses [ins->sreg1]) {
                                        /* SIMD */
                                        g_assert (values [ins->sreg1]);
@@ -3253,6 +3292,7 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                        default:
                                g_assert_not_reached ();
                        }
+                       CHECK_FAILURE (ctx);
 
                        if (done)
                                break;
@@ -3391,9 +3431,15 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                case OP_PMIND_UN:
                case OP_PMINW_UN:
                case OP_PMINB_UN:
+               case OP_PMINW:
                case OP_PMAXD_UN:
                case OP_PMAXW_UN:
-               case OP_PMAXB_UN: {
+               case OP_PMAXB_UN:
+               case OP_PCMPEQB:
+               case OP_PCMPEQW:
+               case OP_PCMPEQD:
+               case OP_PCMPEQQ:
+               case OP_PCMPGTB: {
                        LLVMValueRef args [2];
 
                        args [0] = lhs;
@@ -3412,28 +3458,17 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                        LLVMTypeRef t;
                        gboolean zext = FALSE;
 
+                       t = simd_op_to_llvm_type (ins->opcode);
+
                        switch (ins->opcode) {
                        case OP_EXTRACT_R8:
-                               t = LLVMVectorType (LLVMDoubleType (), 2);
-                               break;
                        case OP_EXTRACT_I8:
-                               t = LLVMVectorType (LLVMInt64Type (), 2);
-                               break;
                        case OP_EXTRACT_I4:
-                               t = LLVMVectorType (LLVMInt32Type (), 4);
-                               break;
                        case OP_EXTRACT_I2:
-                               t = LLVMVectorType (LLVMInt16Type (), 8);
-                               break;
-                       case OP_EXTRACT_U2:
-                               t = LLVMVectorType (LLVMInt16Type (), 8);
-                               zext = TRUE;
-                               break;
                        case OP_EXTRACT_I1:
-                               t = LLVMVectorType (LLVMInt8Type (), 16);
                                break;
+                       case OP_EXTRACT_U2:
                        case OP_EXTRACT_U1:
-                               t = LLVMVectorType (LLVMInt8Type (), 16);
                                zext = TRUE;
                                break;
                        default:
@@ -3447,6 +3482,25 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
                                values [ins->dreg] = LLVMBuildZExt (builder, values [ins->dreg], LLVMInt32Type (), "");
                        break;
                }
+
+               case OP_EXPAND_I1:
+               case OP_EXPAND_I2:
+               case OP_EXPAND_I4:
+               case OP_EXPAND_I8:
+               case OP_EXPAND_R4:
+               case OP_EXPAND_R8: {
+                       LLVMTypeRef t = simd_op_to_llvm_type (ins->opcode);
+                       LLVMValueRef mask [16], v;
+
+                       for (i = 0; i < 16; ++i)
+                               mask [i] = LLVMConstInt (LLVMInt32Type (), 0, FALSE);
+
+                       v = convert (ctx, values [ins->sreg1], LLVMGetElementType (t));
+
+                       values [ins->dreg] = LLVMBuildInsertElement (builder, LLVMConstNull (t), v, LLVMConstInt (LLVMInt32Type (), 0, FALSE), "");
+                       values [ins->dreg] = LLVMBuildShuffleVector (builder, values [ins->dreg], LLVMGetUndef (t), LLVMConstVector (mask, LLVMGetVectorSize (t)), "");
+                       break;
+               }
 #endif
 
                case OP_DUMMY_USE:
@@ -3698,6 +3752,7 @@ mono_llvm_emit_method (MonoCompile *cfg)
        MonoMethodHeader *header;
        MonoExceptionClause *clause;
        LLVMSigInfo sinfo;
+       char **names;
 
        /* The code below might acquire the loader lock, so use it for global locking */
        mono_loader_lock ();
@@ -3790,8 +3845,7 @@ mono_llvm_emit_method (MonoCompile *cfg)
        ctx->lmethod = method;
 
 #ifdef LLVM_MONO_BRANCH
-       if (linfo->rgctx_arg)
-               LLVMSetFunctionCallConv (method, LLVMMono1CallConv);
+       LLVMSetFunctionCallConv (method, LLVMMono1CallConv);
 #endif
        LLVMSetLinkage (method, LLVMPrivateLinkage);
 
@@ -3826,16 +3880,24 @@ mono_llvm_emit_method (MonoCompile *cfg)
                values [cfg->args [0]->dreg] = LLVMGetParam (method, sinfo.this_arg_pindex);
                LLVMSetValueName (values [cfg->args [0]->dreg], "this");
        }
+
+       names = g_new (char *, sig->param_count);
+       mono_method_get_param_names (cfg->method, (const char **) names);
+
        for (i = 0; i < sig->param_count; ++i) {
                char *name;
 
                values [cfg->args [i + sig->hasthis]->dreg] = LLVMGetParam (method, sinfo.pindexes [i]);
-               name = g_strdup_printf ("arg_%d", i);
+               if (names [i] && names [i][0] != '\0')
+                       name = g_strdup_printf ("arg_%s", names [i]);
+               else
+                       name = g_strdup_printf ("arg_%d", i);
                LLVMSetValueName (values [cfg->args [i + sig->hasthis]->dreg], name);
                g_free (name);
                if (linfo->args [i + sig->hasthis].storage == LLVMArgVtypeByVal)
                        LLVMAddAttribute (LLVMGetParam (method, sinfo.pindexes [i]), LLVMByValAttribute);
        }
+       g_free (names);
 
        max_block_num = 0;
        for (bb = cfg->bb_entry; bb; bb = bb->next_bb)
@@ -4253,6 +4315,8 @@ exception_cb (void *data)
        cfg->llvm_this_reg = this_reg;
        cfg->llvm_this_offset = this_offset;
 
+       /* type_info [i] is cfg mempool allocated, no need to free it */
+
        g_free (ei);
        g_free (type_info);
 }
@@ -4380,32 +4444,42 @@ add_intrinsics (LLVMModuleRef module)
                vector_type = LLVMVectorType (LLVMInt32Type (), 4);
                arg_types [0] = vector_type;
                arg_types [1] = vector_type;
-               LLVMAddFunction (module, "llvm.x86.sse41.pminud", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                 
-               LLVMAddFunction (module, "llvm.x86.sse41.pmaxud", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                 
+               LLVMAddFunction (module, "llvm.x86.sse41.pminud", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse41.pmaxud", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse2.pcmpeq.d", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
 
                vector_type = LLVMVectorType (LLVMInt16Type (), 8);
                arg_types [0] = vector_type;
                arg_types [1] = vector_type;
-               LLVMAddFunction (module, "llvm.x86.sse41.pminuw", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                 
-               LLVMAddFunction (module, "llvm.x86.sse41.pmaxuw", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                 
+               LLVMAddFunction (module, "llvm.x86.sse41.pminuw", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse2.pmins.w", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse41.pmaxuw", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse2.pcmpeq.w", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
 
                vector_type = LLVMVectorType (LLVMInt8Type (), 16);
                arg_types [0] = vector_type;
                arg_types [1] = vector_type;
-               LLVMAddFunction (module, "llvm.x86.sse2.pminu.b", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                 
-               LLVMAddFunction (module, "llvm.x86.sse2.pmaxu.b", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                 
+               LLVMAddFunction (module, "llvm.x86.sse2.pminu.b", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse2.pmaxu.b", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse2.pcmpeq.b", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse2.pcmpgt.b", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+
+               vector_type = LLVMVectorType (LLVMInt64Type (), 2);
+               arg_types [0] = vector_type;
+               arg_types [1] = vector_type;
+               LLVMAddFunction (module, "llvm.x86.sse41.pcmpeqq", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
 
                vector_type = LLVMVectorType (LLVMDoubleType (), 2);
                arg_types [0] = vector_type;
                arg_types [1] = vector_type;
-               LLVMAddFunction (module, "llvm.x86.sse2.min.pd", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                  
-               LLVMAddFunction (module, "llvm.x86.sse2.max.pd", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                  
+               LLVMAddFunction (module, "llvm.x86.sse2.min.pd", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse2.max.pd", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
 
                vector_type = LLVMVectorType (LLVMFloatType (), 4);
                arg_types [0] = vector_type;
                arg_types [1] = vector_type;
-               LLVMAddFunction (module, "llvm.x86.sse2.min.ps", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                  
-               LLVMAddFunction (module, "llvm.x86.sse2.max.ps", LLVMFunctionType (vector_type, arg_types, 2, FALSE));                                  
+               LLVMAddFunction (module, "llvm.x86.sse2.min.ps", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
+               LLVMAddFunction (module, "llvm.x86.sse2.max.ps", LLVMFunctionType (vector_type, arg_types, 2, FALSE));
        }
 
        /* Load/Store intrinsics */