2008-10-17 Rodrigo Kumpera <rkumpera@novell.com>
[mono.git] / mono / mini / simd-intrinsics.c
index 505320f6f2bbf255d43cdbda24691c4727591d8c..f3b0ade309f4588ea1dd9a23c0ad0dc36d4b3c0d 100644 (file)
@@ -75,11 +75,44 @@ enum {
        SIMD_EMIT_EXTRACT_MASK
 };
 
-/*This is the size of the largest method name + 1 (to fit the ending \0). Align to 4 as well.*/
-#define SIMD_INTRINSIC_NAME_MAX 20
+#ifdef HAVE_ARRAY_ELEM_INIT
+#define MSGSTRFIELD(line) MSGSTRFIELD1(line)
+#define MSGSTRFIELD1(line) str##line
+static const struct msgstr_t {
+#define SIMD_METHOD(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
+#include "simd-methods.h"
+#undef SIMD_METHOD
+} method_names = {
+#define SIMD_METHOD(str,name) str,
+#include "simd-methods.h"
+#undef SIMD_METHOD
+};
+
+enum {
+#define SIMD_METHOD(str,name) name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
+#include "simd-methods.h"
+};
+#define method_name(idx) ((const char*)&method_names + (idx))
+
+#else
+#define SIMD_METHOD(str,name) str,
+static const char * const method_names [] = {
+#include "simd-methods.h"
+       NULL
+};
+#undef SIMD_METHOD
+#define SIMD_METHOD(str,name) name,
+enum {
+#include "simd-methods.h"
+       SN_LAST
+};
+
+#define method_name(idx) (method_names [(idx)])
+
+#endif
 
 typedef struct {
-       const char name[SIMD_INTRINSIC_NAME_MAX];
+       guint16 name;
        guint16 opcode;
        guint8 simd_emit_mode : 4;
        guint8 simd_version : 4;
@@ -91,43 +124,43 @@ Missing:
 setters
  */
 static const SimdIntrinsc vector4f_intrinsics[] = {
-       { ".ctor", 0, SIMD_EMIT_CTOR },
-       { "AndNot", OP_ANDNPS, SIMD_EMIT_BINARY },
-       { "AddSub", OP_ADDSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
-       { "CompareEquals", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
-       { "CompareLessEqual", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LE },
-       { "CompareLessThan", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LT },
-       { "CompareNotEqual", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
-       { "CompareNotLessEqual", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLE },
-       { "CompareNotLessThan", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLT },
-       { "CompareOrdered", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_ORD },
-       { "CompareUnordered", OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_UNORD },
-       { "DuplicateHigh", OP_DUPPS_HIGH, SIMD_EMIT_UNARY, SIMD_VERSION_SSE3 },
-       { "DuplicateLow", OP_DUPPS_LOW, SIMD_EMIT_UNARY, SIMD_VERSION_SSE3 },
-       { "HorizontalAdd", OP_HADDPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
-       { "HorizontalSub", OP_HSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },    
-       { "InterleaveHigh", OP_UNPACK_HIGHPS, SIMD_EMIT_BINARY },
-       { "InterleaveLow", OP_UNPACK_LOWPS, SIMD_EMIT_BINARY },
-       { "InvSqrt", OP_RSQRTPS, SIMD_EMIT_UNARY },
-       { "LoadAligned", 0, SIMD_EMIT_LOAD_ALIGNED },
-       { "Max", OP_MAXPS, SIMD_EMIT_BINARY },
-       { "Min", OP_MINPS, SIMD_EMIT_BINARY },
-       { "Reciprocal", OP_RCPPS, SIMD_EMIT_UNARY },
-       { "Shuffle", OP_SHUFLEPS, SIMD_EMIT_SHUFFLE },
-       { "Sqrt", OP_SQRTPS, SIMD_EMIT_UNARY },
-       { "StoreAligned", 0, SIMD_EMIT_STORE_ALIGNED },
-       { "get_W", 3, SIMD_EMIT_GETTER },
-       { "get_X", 0, SIMD_EMIT_GETTER },
-       { "get_Y", 1, SIMD_EMIT_GETTER },
-       { "get_Z", 2, SIMD_EMIT_GETTER },
-       { "op_Addition", OP_ADDPS, SIMD_EMIT_BINARY },
-       { "op_BitwiseAnd", OP_ANDPS, SIMD_EMIT_BINARY },
-       { "op_BitwiseOr", OP_ORPS, SIMD_EMIT_BINARY },
-       { "op_Division", OP_DIVPS, SIMD_EMIT_BINARY },
-       { "op_ExclusiveOr", OP_XORPS, SIMD_EMIT_BINARY },
-       { "op_Explicit", 0, SIMD_EMIT_CAST }, 
-       { "op_Multiply", OP_MULPS, SIMD_EMIT_BINARY },
-       { "op_Subtraction", OP_SUBPS, SIMD_EMIT_BINARY },
+       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_AndNot, OP_ANDNPS, SIMD_EMIT_BINARY },
+       { SN_AddSub, OP_ADDSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
+       { SN_CompareEquals, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
+       { SN_CompareLessEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LE },
+       { SN_CompareLessThan, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LT },
+       { SN_CompareNotEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ },
+       { SN_CompareNotLessEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLE },
+       { SN_CompareNotLessThan, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLT },
+       { SN_CompareOrdered, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_ORD },
+       { SN_CompareUnordered, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_UNORD },
+       { SN_DuplicateHigh, OP_DUPPS_HIGH, SIMD_EMIT_UNARY, SIMD_VERSION_SSE3 },
+       { SN_DuplicateLow, OP_DUPPS_LOW, SIMD_EMIT_UNARY, SIMD_VERSION_SSE3 },
+       { SN_HorizontalAdd, OP_HADDPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
+       { SN_HorizontalSub, OP_HSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },   
+       { SN_InterleaveHigh, OP_UNPACK_HIGHPS, SIMD_EMIT_BINARY },
+       { SN_InterleaveLow, OP_UNPACK_LOWPS, SIMD_EMIT_BINARY },
+       { SN_InvSqrt, OP_RSQRTPS, SIMD_EMIT_UNARY },
+       { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
+       { SN_Max, OP_MAXPS, SIMD_EMIT_BINARY },
+       { SN_Min, OP_MINPS, SIMD_EMIT_BINARY },
+       { SN_Reciprocal, OP_RCPPS, SIMD_EMIT_UNARY },
+       { SN_Shuffle, OP_SHUFLEPS, SIMD_EMIT_SHUFFLE },
+       { SN_Sqrt, OP_SQRTPS, SIMD_EMIT_UNARY },
+       { SN_StoreAligned, 0, SIMD_EMIT_STORE_ALIGNED },
+       { SN_get_W, 3, SIMD_EMIT_GETTER },
+       { SN_get_X, 0, SIMD_EMIT_GETTER },
+       { SN_get_Y, 1, SIMD_EMIT_GETTER },
+       { SN_get_Z, 2, SIMD_EMIT_GETTER },
+       { SN_op_Addition, OP_ADDPS, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseAnd, OP_ANDPS, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseOr, OP_ORPS, SIMD_EMIT_BINARY },
+       { SN_op_Division, OP_DIVPS, SIMD_EMIT_BINARY },
+       { SN_op_ExclusiveOr, OP_XORPS, SIMD_EMIT_BINARY },
+       { SN_op_Explicit, 0, SIMD_EMIT_CAST }, 
+       { SN_op_Multiply, OP_MULPS, SIMD_EMIT_BINARY },
+       { SN_op_Subtraction, OP_SUBPS, SIMD_EMIT_BINARY },
 };
 
 /*
@@ -137,22 +170,27 @@ getters
 setters
  */
 static const SimdIntrinsc vector4ui_intrinsics[] = {
-       { "AddWithSaturation", OP_PADDD_SAT_UN, SIMD_EMIT_BINARY },
-       { "LoadAligned", 0, SIMD_EMIT_LOAD_ALIGNED },
-       { "ShiftRightArithmetic", OP_PSARD, SIMD_EMIT_SHIFT },
-       { "StoreAligned", 0, SIMD_EMIT_STORE_ALIGNED },
-       { "SubWithSaturation", OP_PSUBD_SAT_UN, SIMD_EMIT_BINARY },
-       { "UnpackHigh", OP_UNPACK_HIGHD, SIMD_EMIT_BINARY },
-       { "UnpackLow", OP_UNPACK_LOWD, SIMD_EMIT_BINARY },
-       { "op_Addition", OP_PADDD, SIMD_EMIT_BINARY },
-       { "op_BitwiseAnd", OP_PAND, SIMD_EMIT_BINARY },
-       { "op_BitwiseOr", OP_POR, SIMD_EMIT_BINARY },
-       { "op_BitwiseXor", OP_PXOR, SIMD_EMIT_BINARY },
-       { "op_Explicit", 0, SIMD_EMIT_CAST },
-       { "op_LeftShift", OP_PSHLD, SIMD_EMIT_SHIFT },
-       { "op_Multiply", OP_PMULD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
-       { "op_RightShift", OP_PSHRD, SIMD_EMIT_SHIFT },
-       { "op_Subtraction", OP_PSUBD, SIMD_EMIT_BINARY },
+       { SN_AddWithSaturation, OP_PADDD_SAT_UN, SIMD_EMIT_BINARY },
+       { SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY },
+       { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
+       { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
+       { SN_Max, OP_PMAXD_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41},
+       { SN_Min, OP_PMIND_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
+       { SN_ShiftRightArithmetic, OP_PSARD, SIMD_EMIT_SHIFT },
+       { SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE },
+       { SN_StoreAligned, 0, SIMD_EMIT_STORE_ALIGNED },
+       { SN_SubWithSaturation, OP_PSUBD_SAT_UN, SIMD_EMIT_BINARY },
+       { SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_EMIT_BINARY },
+       { SN_UnpackLow, OP_UNPACK_LOWD, SIMD_EMIT_BINARY },
+       { SN_op_Addition, OP_PADDD, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseXor, OP_PXOR, SIMD_EMIT_BINARY },
+       { SN_op_Explicit, 0, SIMD_EMIT_CAST },
+       { SN_op_LeftShift, OP_PSHLD, SIMD_EMIT_SHIFT },
+       { SN_op_Multiply, OP_PMULD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
+       { SN_op_RightShift, OP_PSHRD, SIMD_EMIT_SHIFT },
+       { SN_op_Subtraction, OP_PSUBD, SIMD_EMIT_BINARY },
 };
 
 /*
@@ -162,26 +200,28 @@ getters
 setters
  */
 static const SimdIntrinsc vector8us_intrinsics[] = {
-       { "AddWithSaturation", OP_PADDW_SAT_UN, SIMD_EMIT_BINARY },
-       { "Average", OP_PAVGW_UN, SIMD_EMIT_BINARY },
-       { "ExtractByteMask", 0, SIMD_EMIT_EXTRACT_MASK },
-       { "LoadAligned", 0, SIMD_EMIT_LOAD_ALIGNED },
-       { "ShiftRightArithmetic", OP_PSARW, SIMD_EMIT_SHIFT },
-       { "ShuffleHigh", OP_PSHUFLEW_HIGH, SIMD_EMIT_SHUFFLE },
-       { "ShuffleLow", OP_PSHUFLEW_LOW, SIMD_EMIT_SHUFFLE },
-       { "StoreAligned", 0, SIMD_EMIT_STORE_ALIGNED },
-       { "SubWithSaturation", OP_PSUBW_SAT_UN, SIMD_EMIT_BINARY },
-       { "UnpackHigh", OP_UNPACK_HIGHW, SIMD_EMIT_BINARY },
-       { "UnpackLow", OP_UNPACK_LOWW, SIMD_EMIT_BINARY },
-       { "op_Addition", OP_PADDW, SIMD_EMIT_BINARY },
-       { "op_BitwiseAnd", OP_PAND, SIMD_EMIT_BINARY },
-       { "op_BitwiseOr", OP_POR, SIMD_EMIT_BINARY },
-       { "op_BitwiseXor", OP_PXOR, SIMD_EMIT_BINARY },
-       { "op_Explicit", 0, SIMD_EMIT_CAST },
-       { "op_LeftShift", OP_PSHLW, SIMD_EMIT_SHIFT },
-       { "op_Multiply", OP_PMULW, SIMD_EMIT_BINARY },
-       { "op_RightShift", OP_PSHRW, SIMD_EMIT_SHIFT },
-       { "op_Subtraction", OP_PSUBW, SIMD_EMIT_BINARY },
+       { SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_EMIT_BINARY },
+       { SN_Average, OP_PAVGW_UN, SIMD_EMIT_BINARY },
+       { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
+       { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
+       { SN_Max, OP_PMAXW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41},
+       { SN_Min, OP_PMINW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
+       { SN_ShiftRightArithmetic, OP_PSARW, SIMD_EMIT_SHIFT },
+       { SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_EMIT_SHUFFLE },
+       { SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_EMIT_SHUFFLE },
+       { SN_StoreAligned, 0, SIMD_EMIT_STORE_ALIGNED },
+       { SN_SubWithSaturation, OP_PSUBW_SAT_UN, SIMD_EMIT_BINARY },
+       { SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_EMIT_BINARY },
+       { SN_UnpackLow, OP_UNPACK_LOWW, SIMD_EMIT_BINARY },
+       { SN_op_Addition, OP_PADDW, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseXor, OP_PXOR, SIMD_EMIT_BINARY },
+       { SN_op_Explicit, 0, SIMD_EMIT_CAST },
+       { SN_op_LeftShift, OP_PSHLW, SIMD_EMIT_SHIFT },
+       { SN_op_Multiply, OP_PMULW, SIMD_EMIT_BINARY },
+       { SN_op_RightShift, OP_PSHRW, SIMD_EMIT_SHIFT },
+       { SN_op_Subtraction, OP_PSUBW, SIMD_EMIT_BINARY },
 };
 
 /*
@@ -191,18 +231,24 @@ getters
 setters
  */
 static const SimdIntrinsc vector16b_intrinsics[] = {
-       { "AddWithSaturation", OP_PADDB_SAT_UN, SIMD_EMIT_BINARY },
-       { "LoadAligned", 0, SIMD_EMIT_LOAD_ALIGNED },
-       { "StoreAligned", 0, SIMD_EMIT_STORE_ALIGNED },
-       { "SubWithSaturation", OP_PSUBB_SAT_UN, SIMD_EMIT_BINARY },
-       { "UnpackHigh", OP_UNPACK_HIGHB, SIMD_EMIT_BINARY },
-       { "UnpackLow", OP_UNPACK_LOWB, SIMD_EMIT_BINARY },
-       { "op_Addition", OP_PADDB, SIMD_EMIT_BINARY },
-       { "op_BitwiseAnd", OP_PAND, SIMD_EMIT_BINARY },
-       { "op_BitwiseOr", OP_POR, SIMD_EMIT_BINARY },
-       { "op_BitwiseXor", OP_PXOR, SIMD_EMIT_BINARY },
-       { "op_Explicit", 0, SIMD_EMIT_CAST },
-       { "op_Subtraction", OP_PSUBB, SIMD_EMIT_BINARY },
+       { SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_EMIT_BINARY },
+       { SN_Average, OP_PAVGB_UN, SIMD_EMIT_BINARY },
+       { SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY },
+       { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK },
+       { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
+       { SN_Max, OP_PMAXB_UN, SIMD_EMIT_BINARY },
+       { SN_Min, OP_PMINB_UN, SIMD_EMIT_BINARY },
+       { SN_StoreAligned, 0, SIMD_EMIT_STORE_ALIGNED },
+       { SN_SubWithSaturation, OP_PSUBB_SAT_UN, SIMD_EMIT_BINARY },
+       { SN_SumOfAbsoluteDifferences, OP_PSUM_ABS_DIFF, SIMD_EMIT_BINARY },
+       { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_EMIT_BINARY },
+       { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_EMIT_BINARY },
+       { SN_op_Addition, OP_PADDB, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY },
+       { SN_op_BitwiseXor, OP_PXOR, SIMD_EMIT_BINARY },
+       { SN_op_Explicit, 0, SIMD_EMIT_CAST },
+       { SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY },
 };
 
 static guint32 simd_supported_versions;
@@ -211,7 +257,7 @@ static guint32 simd_supported_versions;
 static int
 simd_intrinsic_compare_by_name (const void *key, const void *value)
 {
-       return strncmp(key, ((SimdIntrinsc *)value)->name, SIMD_INTRINSIC_NAME_MAX);
+       return strcmp (key, method_name (((SimdIntrinsc *)value)->name));
 }
 
 typedef enum {
@@ -222,23 +268,44 @@ typedef enum {
        VREG_MANY_BB_USE                = 0x10,
 } KillFlags;
 
-static inline int
-get_ins_reg_by_idx (MonoInst *ins, int idx)
-{
-       switch (idx) {
-       case 0: return ins->dreg;
-       case 1: return ins->sreg1;
-       case 2: return ins->sreg2;
-       }
-       return -1;
-}
-
 void
 mono_simd_intrinsics_init (void)
 {
        simd_supported_versions = mono_arch_cpu_enumerate_simd_versions ();
        /*TODO log the supported flags*/
 }
+
+static inline gboolean
+apply_vreg_first_block_interference (MonoCompile *cfg, MonoInst *ins, int reg, int max_vreg, char *vreg_flags)
+{
+       if (reg != -1 && reg <= max_vreg && vreg_flags [reg]) {
+               vreg_flags [reg] &= ~VREG_HAS_XZERO_BB0;
+               vreg_flags [reg] |= VREG_HAS_OTHER_OP_BB0;
+               DEBUG (printf ("[simd-simplify] R%d used: ", reg); mono_print_ins(ins));
+               return TRUE;
+       }
+       return FALSE;
+}
+
+static inline gboolean
+apply_vreg_following_block_interference (MonoCompile *cfg, MonoInst *ins, int reg, MonoBasicBlock *bb, int max_vreg, char *vreg_flags, MonoBasicBlock **target_bb)
+{
+       if (reg == -1 || reg > max_vreg || !(vreg_flags [reg] & VREG_HAS_XZERO_BB0) || target_bb [reg] == bb)
+               return FALSE;
+
+       if (vreg_flags [reg] & VREG_SINGLE_BB_USE) {
+               vreg_flags [reg] &= ~VREG_SINGLE_BB_USE;
+               vreg_flags [reg] |= VREG_MANY_BB_USE;
+               DEBUG (printf ("[simd-simplify] R%d used by many bb: ", reg); mono_print_ins(ins));
+               return TRUE;
+       } else if (!(vreg_flags [reg] & VREG_MANY_BB_USE)) {
+               vreg_flags [reg] |= VREG_SINGLE_BB_USE;
+               target_bb [reg] = bb;
+               DEBUG (printf ("[simd-simplify] R%d first used by: ", reg); mono_print_ins(ins));
+               return TRUE;
+       }
+       return FALSE;
+}
 /*
 This pass recalculate which vars need MONO_INST_INDIRECT.
 
@@ -251,12 +318,11 @@ mono_simd_simplify_indirection (MonoCompile *cfg)
        int i, max_vreg = 0;
        MonoBasicBlock *bb, *first_bb = NULL, **target_bb;
        MonoInst *ins;
-       char * vreg_flags;
+       char *vreg_flags;
 
        for (i = 0; i < cfg->num_varinfo; i++) {
                MonoInst *var = cfg->varinfo [i];
                if (var->klass->simd_type) {
-                       // printf ("cleaning indirect flag for %d\n", var->dreg);
                        var->flags &= ~MONO_INST_INDIRECT;
                        max_vreg = MAX (var->dreg, max_vreg);
                }
@@ -296,14 +362,15 @@ mono_simd_simplify_indirection (MonoCompile *cfg)
                        }
                        continue;
                }
-               for (i = 0; i < 3; ++i) {
-                       int reg = get_ins_reg_by_idx (ins, i);
-                       if (reg != -1 && reg <= max_vreg && vreg_flags [reg]) {
-                               vreg_flags [reg] &= ~VREG_HAS_XZERO_BB0;
-                               vreg_flags [reg] |= VREG_HAS_OTHER_OP_BB0;
-                               DEBUG (printf ("[simd-simplify] R%d used: ", reg); mono_print_ins(ins));
-                       }
-               }
+               if (ins->opcode == OP_LDADDR && apply_vreg_first_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, max_vreg, vreg_flags))
+                       continue;
+               
+               if (apply_vreg_first_block_interference (cfg, ins, ins->dreg, max_vreg, vreg_flags))
+                       continue;
+               if (apply_vreg_first_block_interference (cfg, ins, ins->sreg1, max_vreg, vreg_flags))
+                       continue;
+               if (apply_vreg_first_block_interference (cfg, ins, ins->sreg2, max_vreg, vreg_flags))
+                       continue;
        }
 
        if (IS_DEBUG_ON (cfg)) {
@@ -333,23 +400,15 @@ mono_simd_simplify_indirection (MonoCompile *cfg)
 
        for (bb = first_bb->next_bb; bb; bb = bb->next_bb) {
                for (ins = bb->code; ins; ins = ins->next) {
-                       for (i = 0; i < 3; ++i) {
-                               int reg = get_ins_reg_by_idx (ins, i);
-                               if (reg == -1 || reg > max_vreg || !(vreg_flags [reg] & VREG_HAS_XZERO_BB0) || target_bb [reg] == bb)
-                                       continue;
-
-                               if (vreg_flags [reg] & VREG_SINGLE_BB_USE) {
-                                       vreg_flags [reg] &= ~VREG_SINGLE_BB_USE;
-                                       vreg_flags [reg] |= VREG_MANY_BB_USE;
-                                       DEBUG (printf ("[simd-simplify] R%d used by many bb: ", reg); mono_print_ins(ins));
-                                       break;
-                               } else if (!(vreg_flags [reg] & VREG_MANY_BB_USE)) {
-                                       vreg_flags [reg] |= VREG_SINGLE_BB_USE;
-                                       target_bb [reg] = bb;
-                                       DEBUG (printf ("[simd-simplify] R%d first used by: ", reg); mono_print_ins(ins));
-                                       break;
-                               }
-                       }
+                       
+                       if (ins->opcode == OP_LDADDR && apply_vreg_following_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, bb, max_vreg, vreg_flags, target_bb))
+                               continue;
+                       if (apply_vreg_following_block_interference (cfg, ins, ins->dreg, bb, max_vreg, vreg_flags, target_bb))
+                               continue;
+                       if (apply_vreg_following_block_interference (cfg, ins, ins->sreg1, bb, max_vreg, vreg_flags, target_bb))
+                               continue;
+                       if (apply_vreg_following_block_interference (cfg, ins, ins->sreg2, bb, max_vreg, vreg_flags, target_bb))
+                               continue;
                }
        }
 
@@ -722,7 +781,7 @@ emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
        }
        if (IS_DEBUG_ON (cfg)) {
                int i, max;
-               printf ("found call to intrinsic %s::%s/%d -> %s\n", cmethod->klass->name, cmethod->name, fsig->param_count, result->name);
+               printf ("found call to intrinsic %s::%s/%d -> %s\n", cmethod->klass->name, cmethod->name, fsig->param_count, method_name (result->name));
                max = fsig->param_count + fsig->hasthis;
                for (i = 0; i < max; ++i) {
                        printf ("param %d:  ", i);