X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fmini%2Fsimd-intrinsics.c;h=d7ddc17f17a128337679b6b55a096a47beb1fba3;hb=0e93962a241c0e0567043a8d731cc6fdc36253bc;hp=c81619d7cc4688417de834b2d0b7c87813b8bdb9;hpb=6f5a0b555436eba699f9dd2659471044097b951b;p=mono.git diff --git a/mono/mini/simd-intrinsics.c b/mono/mini/simd-intrinsics.c index c81619d7cc4..d7ddc17f17a 100644 --- a/mono/mini/simd-intrinsics.c +++ b/mono/mini/simd-intrinsics.c @@ -127,7 +127,7 @@ typedef struct { } SimdIntrinsc; static const SimdIntrinsc vector4f_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_R4, SIMD_EMIT_CTOR }, { SN_AddSub, OP_ADDSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 }, { SN_AndNot, OP_ANDNPS, SIMD_EMIT_BINARY }, { SN_CompareEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, @@ -178,7 +178,7 @@ static const SimdIntrinsc vector4f_intrinsics[] = { }; static const SimdIntrinsc vector2d_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_R8, SIMD_EMIT_CTOR }, { SN_AddSub, OP_ADDSUBPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 }, { SN_AndNot, OP_ANDNPD, SIMD_EMIT_BINARY }, { SN_CompareEqual, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, @@ -217,7 +217,7 @@ static const SimdIntrinsc vector2d_intrinsics[] = { }; static const SimdIntrinsc vector2ul_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_I8, SIMD_EMIT_CTOR }, { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, @@ -243,7 +243,7 @@ static const SimdIntrinsc vector2ul_intrinsics[] = { }; static const SimdIntrinsc vector2l_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_I8, SIMD_EMIT_CTOR }, { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, { SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE42 }, { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, @@ -270,7 +270,7 @@ static const SimdIntrinsc vector2l_intrinsics[] = { }; static const SimdIntrinsc vector4ui_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_I4, SIMD_EMIT_CTOR }, { SN_ArithmeticRightShift, OP_PSARD, SIMD_EMIT_SHIFT }, { SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY }, { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, @@ -308,7 +308,7 @@ static const SimdIntrinsc vector4ui_intrinsics[] = { }; static const SimdIntrinsc vector4i_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_I4, SIMD_EMIT_CTOR }, { SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY }, { SN_CompareGreaterThan, OP_PCMPGTD, SIMD_EMIT_BINARY }, { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, @@ -347,7 +347,7 @@ static const SimdIntrinsc vector4i_intrinsics[] = { }; static const SimdIntrinsc vector8us_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_I2, SIMD_EMIT_CTOR }, { SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_EMIT_BINARY }, { SN_ArithmeticRightShift, OP_PSARW, SIMD_EMIT_SHIFT }, { SN_Average, OP_PAVGW_UN, SIMD_EMIT_BINARY }, @@ -398,7 +398,7 @@ static const SimdIntrinsc vector8us_intrinsics[] = { }; static const SimdIntrinsc vector8s_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_I2, SIMD_EMIT_CTOR }, { SN_AddWithSaturation, OP_PADDW_SAT, SIMD_EMIT_BINARY }, { SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY }, { SN_CompareGreaterThan, OP_PCMPGTW, SIMD_EMIT_BINARY }, @@ -449,7 +449,7 @@ static const SimdIntrinsc vector8s_intrinsics[] = { }; static const SimdIntrinsc vector16b_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_I1, SIMD_EMIT_CTOR }, { SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_EMIT_BINARY }, { SN_Average, OP_PAVGB_UN, SIMD_EMIT_BINARY }, { SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY }, @@ -513,7 +513,7 @@ Missing: setters */ static const SimdIntrinsc vector16sb_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, + { SN_ctor, OP_EXPAND_I1, SIMD_EMIT_CTOR }, { SN_AddWithSaturation, OP_PADDB_SAT, SIMD_EMIT_BINARY }, { SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY }, { SN_CompareGreaterThan, OP_PCMPGTB, SIMD_EMIT_BINARY }, @@ -676,6 +676,9 @@ mono_simd_simplify_indirection (MonoCompile *cfg) /*Scan the first basic block looking xzeros not used*/ for (ins = first_bb->code; ins; ins = ins->next) { + int num_sregs; + int sregs [MONO_MAX_SRC_REGS]; + if (ins->opcode == OP_XZERO) { if (!(vreg_flags [ins->dreg] & VREG_HAS_OTHER_OP_BB0)) { DEBUG (printf ("[simd-simplify] R%d has vzero: ", ins->dreg); mono_print_ins(ins)); @@ -685,13 +688,13 @@ mono_simd_simplify_indirection (MonoCompile *cfg) } if (ins->opcode == OP_LDADDR && apply_vreg_first_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, max_vreg, vreg_flags)) continue; - if (apply_vreg_first_block_interference (cfg, ins, ins->dreg, max_vreg, vreg_flags)) continue; - if (apply_vreg_first_block_interference (cfg, ins, ins->sreg1, max_vreg, vreg_flags)) - continue; - if (apply_vreg_first_block_interference (cfg, ins, ins->sreg2, max_vreg, vreg_flags)) - continue; + num_sregs = mono_inst_get_src_registers (ins, sregs); + for (i = 0; i < num_sregs; ++i) { + if (apply_vreg_first_block_interference (cfg, ins, sregs [i], max_vreg, vreg_flags)) + break; + } } if (IS_DEBUG_ON (cfg)) { @@ -721,15 +724,19 @@ mono_simd_simplify_indirection (MonoCompile *cfg) for (bb = first_bb->next_bb; bb; bb = bb->next_bb) { for (ins = bb->code; ins; ins = ins->next) { - + int num_sregs; + int sregs [MONO_MAX_SRC_REGS]; + if (ins->opcode == OP_LDADDR && apply_vreg_following_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, bb, max_vreg, vreg_flags, target_bb)) continue; if (apply_vreg_following_block_interference (cfg, ins, ins->dreg, bb, max_vreg, vreg_flags, target_bb)) continue; - if (apply_vreg_following_block_interference (cfg, ins, ins->sreg1, bb, max_vreg, vreg_flags, target_bb)) - continue; - if (apply_vreg_following_block_interference (cfg, ins, ins->sreg2, bb, max_vreg, vreg_flags, target_bb)) - continue; + num_sregs = mono_inst_get_src_registers (ins, sregs); + for (i = 0; i < num_sregs; ++i) { + if (apply_vreg_following_block_interference (cfg, ins, sregs [i], bb, + max_vreg, vreg_flags, target_bb)) + continue; + } } } @@ -745,10 +752,19 @@ mono_simd_simplify_indirection (MonoCompile *cfg) if (!(vreg_flags [var->dreg] & VREG_SINGLE_BB_USE)) continue; for (ins = target_bb [var->dreg]->code; ins; ins = ins->next) { + int num_sregs, j; + int sregs [MONO_MAX_SRC_REGS]; + gboolean found = FALSE; + + num_sregs = mono_inst_get_src_registers (ins, sregs); + for (j = 0; j < num_sregs; ++j) { + if (sregs [i] == var->dreg) + found = TRUE; + } /*We can avoid inserting the XZERO if the first use doesn't depend on the zero'ed value.*/ - if (ins->dreg == var->dreg && ins->sreg1 != var->dreg && ins->sreg2 != var->dreg) { + if (ins->dreg == var->dreg && !found) { break; - } else if (ins->sreg1 == var->dreg || ins->sreg2 == var->dreg) { + } else if (found) { MonoInst *tmp; MONO_INST_NEW (cfg, tmp, OP_XZERO); tmp->dreg = var->dreg; @@ -1065,6 +1081,38 @@ simd_intrinsic_emit_ctor (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoM int store_op = mono_type_to_store_membase (cfg, sig->params [0]); int arg_size = mono_type_size (sig->params [0], &i); + if (sig->param_count == 1) { + int dreg; + + if (is_ldaddr) { + dreg = args [0]->inst_i0->dreg; + NULLIFY_INS (args [0]); + } else { + g_assert (args [0]->type == STACK_MP || args [0]->type == STACK_PTR); + dreg = alloc_ireg (cfg); + } + + MONO_INST_NEW (cfg, ins, intrinsic->opcode); + ins->klass = cmethod->klass; + ins->sreg1 = args [1]->dreg; + ins->type = STACK_VTYPE; + ins->dreg = dreg; + + MONO_ADD_INS (cfg->cbb, ins); + if (sig->params [0]->type == MONO_TYPE_R4) + ins->backend.spill_var = get_int_to_float_spill_area (cfg); + else if (sig->params [0]->type == MONO_TYPE_R8) + ins->backend.spill_var = get_double_spill_area (cfg); + + if (!is_ldaddr) { + MONO_INST_NEW (cfg, ins, OP_STOREX_MEMBASE); + ins->dreg = args [0]->dreg; + ins->sreg1 = dreg; + MONO_ADD_INS (cfg->cbb, ins); + } + return ins; + } + if (is_ldaddr) { NEW_VARLOADA (cfg, ins, get_simd_ctor_spill_area (cfg, cmethod->klass), &cmethod->klass->byref_arg); MONO_ADD_INS (cfg->cbb, ins); @@ -1452,37 +1500,45 @@ emit_simd_runtime_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodS MonoInst* mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) { + const char *class_name; + if (strcmp ("Mono.Simd", cmethod->klass->name_space)) return NULL; - - if (!strcmp ("SimdRuntime", cmethod->klass->name)) + + class_name = cmethod->klass->name; + if (!strcmp ("SimdRuntime", class_name)) return emit_simd_runtime_intrinsics (cfg, cmethod, fsig, args); - if (!strcmp ("ArrayExtensions", cmethod->klass->name)) + if (!strcmp ("ArrayExtensions", class_name)) return emit_array_extension_intrinsics (cfg, cmethod, fsig, args); - if (!cmethod->klass->simd_type) + if (!strcmp ("VectorOperations", class_name)) { + if (!(cmethod->flags & METHOD_ATTRIBUTE_STATIC)) + return NULL; + class_name = mono_class_from_mono_type (mono_method_signature (cmethod)->params [0])->name; + } else if (!cmethod->klass->simd_type) return NULL; + cfg->uses_simd_intrinsics = 1; - if (!strcmp ("Vector2d", cmethod->klass->name)) + if (!strcmp ("Vector2d", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector2d_intrinsics, sizeof (vector2d_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector4f", cmethod->klass->name)) + if (!strcmp ("Vector4f", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector4f_intrinsics, sizeof (vector4f_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector2ul", cmethod->klass->name)) + if (!strcmp ("Vector2ul", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector2ul_intrinsics, sizeof (vector2ul_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector2l", cmethod->klass->name)) + if (!strcmp ("Vector2l", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector2l_intrinsics, sizeof (vector2l_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector4ui", cmethod->klass->name)) + if (!strcmp ("Vector4ui", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector4ui_intrinsics, sizeof (vector4ui_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector4i", cmethod->klass->name)) + if (!strcmp ("Vector4i", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector4i_intrinsics, sizeof (vector4i_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector8us", cmethod->klass->name)) + if (!strcmp ("Vector8us", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector8us_intrinsics, sizeof (vector8us_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector8s", cmethod->klass->name)) + if (!strcmp ("Vector8s", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector8s_intrinsics, sizeof (vector8s_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector16b", cmethod->klass->name)) + if (!strcmp ("Vector16b", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector16b_intrinsics, sizeof (vector16b_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector16sb", cmethod->klass->name)) + if (!strcmp ("Vector16sb", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector16sb_intrinsics, sizeof (vector16sb_intrinsics) / sizeof (SimdIntrinsc)); return NULL;