2010-02-02 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / simd-intrinsics.c
index c81619d7cc4688417de834b2d0b7c87813b8bdb9..abbaf4e37789cf2c0e84fa278485b205717f440b 100644 (file)
@@ -127,7 +127,7 @@ typedef struct {
 } SimdIntrinsc;
 
 static const SimdIntrinsc vector4f_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_R4, SIMD_EMIT_CTOR },
        { SN_AddSub, OP_ADDSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
        { SN_AndNot, OP_ANDNPS, SIMD_EMIT_BINARY },
        { SN_CompareEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
@@ -178,7 +178,7 @@ static const SimdIntrinsc vector4f_intrinsics[] = {
 };
 
 static const SimdIntrinsc vector2d_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_R8, SIMD_EMIT_CTOR },
        { SN_AddSub, OP_ADDSUBPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 },
        { SN_AndNot, OP_ANDNPD, SIMD_EMIT_BINARY },
        { SN_CompareEqual, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ },
@@ -201,6 +201,7 @@ static const SimdIntrinsc vector2d_intrinsics[] = {
        { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 },
        { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 },
        { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA },
+       { SN_Sqrt, OP_SQRTPD, SIMD_EMIT_UNARY },
        { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE },
        { SN_get_X, 0, SIMD_EMIT_GETTER_QWORD },
        { SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD },
@@ -217,7 +218,7 @@ static const SimdIntrinsc vector2d_intrinsics[] = {
 };
 
 static const SimdIntrinsc vector2ul_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_I8, SIMD_EMIT_CTOR },
        { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
        { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
        { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 },
@@ -243,7 +244,7 @@ static const SimdIntrinsc vector2ul_intrinsics[] = {
 };
 
 static const SimdIntrinsc vector2l_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_I8, SIMD_EMIT_CTOR },
        { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 },
        { SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE42 },
        { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
@@ -270,7 +271,7 @@ static const SimdIntrinsc vector2l_intrinsics[] = {
 };
 
 static const SimdIntrinsc vector4ui_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_I4, SIMD_EMIT_CTOR },
        { SN_ArithmeticRightShift, OP_PSARD, SIMD_EMIT_SHIFT },
        { SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY },
        { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
@@ -308,7 +309,7 @@ static const SimdIntrinsc vector4ui_intrinsics[] = {
 };
 
 static const SimdIntrinsc vector4i_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_I4, SIMD_EMIT_CTOR },
        { SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY },
        { SN_CompareGreaterThan, OP_PCMPGTD, SIMD_EMIT_BINARY },
        { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED },
@@ -347,7 +348,7 @@ static const SimdIntrinsc vector4i_intrinsics[] = {
 };
 
 static const SimdIntrinsc vector8us_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_I2, SIMD_EMIT_CTOR },
        { SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_EMIT_BINARY },
        { SN_ArithmeticRightShift, OP_PSARW, SIMD_EMIT_SHIFT },
        { SN_Average, OP_PAVGW_UN, SIMD_EMIT_BINARY },
@@ -398,7 +399,7 @@ static const SimdIntrinsc vector8us_intrinsics[] = {
 };
 
 static const SimdIntrinsc vector8s_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_I2, SIMD_EMIT_CTOR },
        { SN_AddWithSaturation, OP_PADDW_SAT, SIMD_EMIT_BINARY },
        { SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY },
        { SN_CompareGreaterThan, OP_PCMPGTW, SIMD_EMIT_BINARY },
@@ -449,7 +450,7 @@ static const SimdIntrinsc vector8s_intrinsics[] = {
 };
 
 static const SimdIntrinsc vector16b_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_I1, SIMD_EMIT_CTOR },
        { SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_EMIT_BINARY },
        { SN_Average, OP_PAVGB_UN, SIMD_EMIT_BINARY },
        { SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY },
@@ -513,7 +514,7 @@ Missing:
 setters
  */
 static const SimdIntrinsc vector16sb_intrinsics[] = {
-       { SN_ctor, 0, SIMD_EMIT_CTOR },
+       { SN_ctor, OP_EXPAND_I1, SIMD_EMIT_CTOR },
        { SN_AddWithSaturation, OP_PADDB_SAT, SIMD_EMIT_BINARY },
        { SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY },
        { SN_CompareGreaterThan, OP_PCMPGTB, SIMD_EMIT_BINARY },
@@ -676,6 +677,9 @@ mono_simd_simplify_indirection (MonoCompile *cfg)
 
        /*Scan the first basic block looking xzeros not used*/
        for (ins = first_bb->code; ins; ins = ins->next) {
+               int num_sregs;
+               int sregs [MONO_MAX_SRC_REGS];
+
                if (ins->opcode == OP_XZERO) {
                        if (!(vreg_flags [ins->dreg] & VREG_HAS_OTHER_OP_BB0)) {
                                DEBUG (printf ("[simd-simplify] R%d has vzero: ", ins->dreg); mono_print_ins(ins));
@@ -685,13 +689,13 @@ mono_simd_simplify_indirection (MonoCompile *cfg)
                }
                if (ins->opcode == OP_LDADDR && apply_vreg_first_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, max_vreg, vreg_flags))
                        continue;
-               
                if (apply_vreg_first_block_interference (cfg, ins, ins->dreg, max_vreg, vreg_flags))
                        continue;
-               if (apply_vreg_first_block_interference (cfg, ins, ins->sreg1, max_vreg, vreg_flags))
-                       continue;
-               if (apply_vreg_first_block_interference (cfg, ins, ins->sreg2, max_vreg, vreg_flags))
-                       continue;
+               num_sregs = mono_inst_get_src_registers (ins, sregs);
+               for (i = 0; i < num_sregs; ++i) {
+                       if (apply_vreg_first_block_interference (cfg, ins, sregs [i], max_vreg, vreg_flags))
+                               break;
+               }
        }
 
        if (IS_DEBUG_ON (cfg)) {
@@ -721,15 +725,19 @@ mono_simd_simplify_indirection (MonoCompile *cfg)
 
        for (bb = first_bb->next_bb; bb; bb = bb->next_bb) {
                for (ins = bb->code; ins; ins = ins->next) {
-                       
+                       int num_sregs;
+                       int sregs [MONO_MAX_SRC_REGS];
+
                        if (ins->opcode == OP_LDADDR && apply_vreg_following_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, bb, max_vreg, vreg_flags, target_bb))
                                continue;
                        if (apply_vreg_following_block_interference (cfg, ins, ins->dreg, bb, max_vreg, vreg_flags, target_bb))
                                continue;
-                       if (apply_vreg_following_block_interference (cfg, ins, ins->sreg1, bb, max_vreg, vreg_flags, target_bb))
-                               continue;
-                       if (apply_vreg_following_block_interference (cfg, ins, ins->sreg2, bb, max_vreg, vreg_flags, target_bb))
-                               continue;
+                       num_sregs = mono_inst_get_src_registers (ins, sregs);
+                       for (i = 0; i < num_sregs; ++i) {
+                               if (apply_vreg_following_block_interference (cfg, ins, sregs [i], bb,
+                                               max_vreg, vreg_flags, target_bb))
+                                       continue;
+                       }
                }
        }
 
@@ -745,10 +753,19 @@ mono_simd_simplify_indirection (MonoCompile *cfg)
                if (!(vreg_flags [var->dreg] & VREG_SINGLE_BB_USE))
                        continue;
                for (ins = target_bb [var->dreg]->code; ins; ins = ins->next) {
+                       int num_sregs, j;
+                       int sregs [MONO_MAX_SRC_REGS];
+                       gboolean found = FALSE;
+
+                       num_sregs = mono_inst_get_src_registers (ins, sregs);
+                       for (j = 0; j < num_sregs; ++j) {
+                               if (sregs [i] == var->dreg)
+                                       found = TRUE;
+                       }
                        /*We can avoid inserting the XZERO if the first use doesn't depend on the zero'ed value.*/
-                       if (ins->dreg == var->dreg && ins->sreg1 != var->dreg && ins->sreg2 != var->dreg) {
+                       if (ins->dreg == var->dreg && !found) {
                                break;
-                       } else if (ins->sreg1 == var->dreg || ins->sreg2 == var->dreg) {
+                       } else if (found) {
                                MonoInst *tmp;
                                MONO_INST_NEW (cfg, tmp, OP_XZERO);
                                tmp->dreg = var->dreg;
@@ -789,8 +806,10 @@ get_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src)
  * This function will load the value if needed. 
  */
 static int
-load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src)
+load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect)
 {
+       if (indirect)
+               *indirect = FALSE;
        if (src->opcode == OP_XMOVE) {
                return src->sreg1;
        } else if (src->opcode == OP_LDADDR) {
@@ -801,6 +820,8 @@ load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src)
                return src->dreg;
        } else if (src->type == STACK_PTR || src->type == STACK_MP) {
                MonoInst *ins;
+               if (indirect)
+                       *indirect = TRUE;
 
                MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
                ins->klass = cmethod->klass;
@@ -953,13 +974,16 @@ simd_intrinsic_emit_setter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon
        MonoInst *ins;
        MonoMethodSignature *sig = mono_method_signature (cmethod);
        int size, align;
+       gboolean indirect;
+       int dreg;
+
        size = mono_type_size (sig->params [0], &align); 
 
        if (size == 2 || size == 4 || size == 8) {
                MONO_INST_NEW (cfg, ins, mono_type_to_slow_insert_op (sig->params [0]));
                ins->klass = cmethod->klass;
                /*This is a partial load so we encode the dependency on the previous value by setting dreg and sreg1 to the same value.*/
-               ins->dreg = ins->sreg1 = load_simd_vreg (cfg, cmethod, args [0]);
+               ins->dreg = ins->sreg1 = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
                ins->sreg2 = args [1]->dreg;
                ins->inst_c0 = intrinsic->opcode;
                if (sig->params [0]->type == MONO_TYPE_R4)
@@ -972,7 +996,7 @@ simd_intrinsic_emit_setter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon
 
                MONO_INST_NEW (cfg, ins, OP_EXTRACTX_U2);
                ins->klass = cmethod->klass;
-               ins->sreg1 = sreg = load_simd_vreg (cfg, cmethod, args [0]);
+               ins->sreg1 = sreg = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
                ins->type = STACK_I4;
                ins->dreg = vreg = alloc_ireg (cfg);
                ins->inst_c0 = intrinsic->opcode / 2;
@@ -985,7 +1009,14 @@ simd_intrinsic_emit_setter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon
                ins->dreg = sreg;
                ins->inst_c0 = intrinsic->opcode;
                MONO_ADD_INS (cfg->cbb, ins);
+       }
 
+       if (indirect) {
+               MONO_INST_NEW (cfg, ins, OP_STOREX_MEMBASE);
+               ins->klass = cmethod->klass;
+               ins->dreg = args [0]->dreg;
+               ins->sreg1 = dreg;
+               MONO_ADD_INS (cfg->cbb, ins);
        }
        return ins;
 }
@@ -997,7 +1028,7 @@ simd_intrinsic_emit_getter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon
        MonoMethodSignature *sig = mono_method_signature (cmethod);
        int vreg, shift_bits = mono_type_elements_shift_bits (sig->ret);
 
-       vreg = load_simd_vreg (cfg, cmethod, args [0]);
+       vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
 
        if (intrinsic->opcode >> shift_bits) {
                MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
@@ -1036,7 +1067,7 @@ simd_intrinsic_emit_long_getter (const SimdIntrinsc *intrinsic, MonoCompile *cfg
        int vreg;
        gboolean is_r8 = mono_method_signature (cmethod)->ret->type == MONO_TYPE_R8;
 
-       vreg = load_simd_vreg (cfg, cmethod, args [0]);
+       vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
 
        MONO_INST_NEW (cfg, ins, is_r8 ? OP_EXTRACT_R8 : OP_EXTRACT_I8);
        ins->klass = cmethod->klass;
@@ -1065,6 +1096,38 @@ simd_intrinsic_emit_ctor (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoM
        int store_op = mono_type_to_store_membase (cfg, sig->params [0]);
        int arg_size = mono_type_size (sig->params [0], &i);
 
+       if (sig->param_count == 1) {
+               int dreg;
+               
+               if (is_ldaddr) {
+                       dreg = args [0]->inst_i0->dreg;
+                       NULLIFY_INS (args [0]);
+               } else {
+                       g_assert (args [0]->type == STACK_MP || args [0]->type == STACK_PTR);
+                       dreg = alloc_ireg (cfg);
+               }
+
+               MONO_INST_NEW (cfg, ins, intrinsic->opcode);
+               ins->klass = cmethod->klass;
+               ins->sreg1 = args [1]->dreg;
+               ins->type = STACK_VTYPE;
+               ins->dreg = dreg;
+
+               MONO_ADD_INS (cfg->cbb, ins);
+               if (sig->params [0]->type == MONO_TYPE_R4)
+                       ins->backend.spill_var = get_int_to_float_spill_area (cfg);
+               else if (sig->params [0]->type == MONO_TYPE_R8)
+                       ins->backend.spill_var = get_double_spill_area (cfg);
+
+               if (!is_ldaddr) {
+                       MONO_INST_NEW (cfg, ins, OP_STOREX_MEMBASE);
+                       ins->dreg = args [0]->dreg;
+                       ins->sreg1 = dreg;
+                       MONO_ADD_INS (cfg->cbb, ins);
+               }
+               return ins;
+       }
+
        if (is_ldaddr) {
                NEW_VARLOADA (cfg, ins, get_simd_ctor_spill_area (cfg, cmethod->klass), &cmethod->klass->byref_arg);
                MONO_ADD_INS (cfg->cbb, ins);
@@ -1201,8 +1264,8 @@ simd_intrinsic_emit_shuffle (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mo
        /*TODO Exposing shuffle is not a good thing as it's non obvious. We should come up with better abstractions*/
 
        if (args [1]->opcode != OP_ICONST) {
-               g_warning ("Shuffle with non literals is not yet supported");
-               g_assert_not_reached ();
+               /*TODO Shuffle with non literals is not yet supported */
+               return NULL;
        }
        vreg = get_simd_vreg (cfg, cmethod, args [0]);
        NULLIFY_INS (args [1]);
@@ -1452,37 +1515,45 @@ emit_simd_runtime_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodS
 MonoInst*
 mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
 {
+       const char *class_name;
+
        if (strcmp ("Mono.Simd", cmethod->klass->name_space))
                return NULL;
-       
-       if (!strcmp ("SimdRuntime", cmethod->klass->name))
+
+       class_name = cmethod->klass->name;
+       if (!strcmp ("SimdRuntime", class_name))
                return emit_simd_runtime_intrinsics (cfg, cmethod, fsig, args);
 
-       if (!strcmp ("ArrayExtensions", cmethod->klass->name))
+       if (!strcmp ("ArrayExtensions", class_name))
                return emit_array_extension_intrinsics (cfg, cmethod, fsig, args);
        
-       if (!cmethod->klass->simd_type)
+       if (!strcmp ("VectorOperations", class_name)) {
+               if (!(cmethod->flags & METHOD_ATTRIBUTE_STATIC))
+                       return NULL;
+               class_name = mono_class_from_mono_type (mono_method_signature (cmethod)->params [0])->name;
+       } else if (!cmethod->klass->simd_type)
                return NULL;
+
        cfg->uses_simd_intrinsics = 1;
-       if (!strcmp ("Vector2d", cmethod->klass->name))
+       if (!strcmp ("Vector2d", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector2d_intrinsics, sizeof (vector2d_intrinsics) / sizeof (SimdIntrinsc));
-       if (!strcmp ("Vector4f", cmethod->klass->name))
+       if (!strcmp ("Vector4f", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector4f_intrinsics, sizeof (vector4f_intrinsics) / sizeof (SimdIntrinsc));
-       if (!strcmp ("Vector2ul", cmethod->klass->name))
+       if (!strcmp ("Vector2ul", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector2ul_intrinsics, sizeof (vector2ul_intrinsics) / sizeof (SimdIntrinsc));
-       if (!strcmp ("Vector2l", cmethod->klass->name))
+       if (!strcmp ("Vector2l", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector2l_intrinsics, sizeof (vector2l_intrinsics) / sizeof (SimdIntrinsc));
-       if (!strcmp ("Vector4ui", cmethod->klass->name))
+       if (!strcmp ("Vector4ui", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector4ui_intrinsics, sizeof (vector4ui_intrinsics) / sizeof (SimdIntrinsc));
-       if (!strcmp ("Vector4i", cmethod->klass->name))
+       if (!strcmp ("Vector4i", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector4i_intrinsics, sizeof (vector4i_intrinsics) / sizeof (SimdIntrinsc));
-       if (!strcmp ("Vector8us", cmethod->klass->name))
+       if (!strcmp ("Vector8us", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector8us_intrinsics, sizeof (vector8us_intrinsics) / sizeof (SimdIntrinsc));
-       if (!strcmp ("Vector8s", cmethod->klass->name))
+       if (!strcmp ("Vector8s", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector8s_intrinsics, sizeof (vector8s_intrinsics) / sizeof (SimdIntrinsc));
-       if (!strcmp ("Vector16b", cmethod->klass->name))
+       if (!strcmp ("Vector16b", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector16b_intrinsics, sizeof (vector16b_intrinsics) / sizeof (SimdIntrinsc));
-       if (!strcmp ("Vector16sb", cmethod->klass->name))
+       if (!strcmp ("Vector16sb", class_name))
                return emit_intrinsics (cfg, cmethod, fsig, args, vector16sb_intrinsics, sizeof (vector16sb_intrinsics) / sizeof (SimdIntrinsc));
 
        return NULL;