X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fmini%2Fsimd-intrinsics.c;h=d55055f9f4763bf2833553eb5d40c3aceb338cce;hb=1575348f10b4d0908f4b3dd594f3202b32f82584;hp=0f484a98758ab2c15d81df374257df9e9eaabe59;hpb=aea57f09d430ae672e03d96729d447c814ec867c;p=mono.git diff --git a/mono/mini/simd-intrinsics.c b/mono/mini/simd-intrinsics.c index 0f484a98758..d55055f9f47 100644 --- a/mono/mini/simd-intrinsics.c +++ b/mono/mini/simd-intrinsics.c @@ -1,5 +1,6 @@ -/* - * simd-instrisics.c: simd support for intrinsics +/** + * \file + * simd support for intrinsics * * Author: * Rodrigo Kumpera (rkumpera@novell.com) @@ -14,6 +15,7 @@ #include "ir-emit.h" #include "mono/utils/bsearch.h" #include +#include /* General notes on SIMD intrinsics @@ -135,9 +137,9 @@ typedef struct { guint8 simd_version_flags; guint8 simd_emit_mode : 4; guint8 flags : 4; -} SimdIntrinsc; +} SimdIntrinsic; -static const SimdIntrinsc vector4f_intrinsics[] = { +static const SimdIntrinsic vector4f_intrinsics[] = { { SN_ctor, OP_EXPAND_R4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_AddSub, OP_ADDSUBPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY}, { SN_AndNot, OP_ANDNPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY}, @@ -191,7 +193,7 @@ static const SimdIntrinsc vector4f_intrinsics[] = { { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER } }; -static const SimdIntrinsc vector2d_intrinsics[] = { +static const SimdIntrinsic vector2d_intrinsics[] = { { SN_ctor, OP_EXPAND_R8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_AddSub, OP_ADDSUBPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY,}, { SN_AndNot, OP_ANDNPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, @@ -235,7 +237,7 @@ static const SimdIntrinsc vector2d_intrinsics[] = { { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; -static const SimdIntrinsc vector2ul_intrinsics[] = { +static const SimdIntrinsic vector2ul_intrinsics[] = { { SN_ctor, OP_EXPAND_I8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_CompareEqual, OP_PCMPEQQ, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, @@ -262,7 +264,7 @@ static const SimdIntrinsc vector2ul_intrinsics[] = { { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; -static const SimdIntrinsc vector2l_intrinsics[] = { +static const SimdIntrinsic vector2l_intrinsics[] = { { SN_ctor, OP_EXPAND_I8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_CompareEqual, OP_PCMPEQQ, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, { SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_VERSION_SSE42, SIMD_EMIT_BINARY }, @@ -290,7 +292,7 @@ static const SimdIntrinsc vector2l_intrinsics[] = { { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; -static const SimdIntrinsc vector4ui_intrinsics[] = { +static const SimdIntrinsic vector4ui_intrinsics[] = { { SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_ArithmeticRightShift, OP_PSARD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, { SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, @@ -328,7 +330,7 @@ static const SimdIntrinsc vector4ui_intrinsics[] = { { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; -static const SimdIntrinsc vector4i_intrinsics[] = { +static const SimdIntrinsic vector4i_intrinsics[] = { { SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, { SN_CompareGreaterThan, OP_PCMPGTD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, @@ -369,7 +371,7 @@ static const SimdIntrinsc vector4i_intrinsics[] = { { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; -static const SimdIntrinsc vector8us_intrinsics[] = { +static const SimdIntrinsic vector8us_intrinsics[] = { { SN_ctor, OP_EXPAND_I2, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, { SN_ArithmeticRightShift, OP_PSARW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, @@ -420,7 +422,7 @@ static const SimdIntrinsc vector8us_intrinsics[] = { { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; -static const SimdIntrinsc vector8s_intrinsics[] = { +static const SimdIntrinsic vector8s_intrinsics[] = { { SN_ctor, OP_EXPAND_I2, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_AddWithSaturation, OP_PADDW_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, { SN_CompareEqual, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, @@ -471,7 +473,7 @@ static const SimdIntrinsc vector8s_intrinsics[] = { { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; -static const SimdIntrinsc vector16b_intrinsics[] = { +static const SimdIntrinsic vector16b_intrinsics[] = { { SN_ctor, OP_EXPAND_I1, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, { SN_Average, OP_PAVGB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, @@ -535,7 +537,7 @@ static const SimdIntrinsc vector16b_intrinsics[] = { Missing: setters */ -static const SimdIntrinsc vector16sb_intrinsics[] = { +static const SimdIntrinsic vector16sb_intrinsics[] = { { SN_ctor, OP_EXPAND_I1, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, { SN_AddWithSaturation, OP_PADDB_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, { SN_CompareEqual, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, @@ -596,11 +598,14 @@ static const SimdIntrinsc vector16sb_intrinsics[] = { static guint32 simd_supported_versions; +static MonoInst* emit_sys_numerics_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args); +static MonoInst* emit_sys_numerics_vectors_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args); + /*TODO match using number of parameters as well*/ static int simd_intrinsic_compare_by_name (const void *key, const void *value) { - return strcmp (key, method_name (((SimdIntrinsc *)value)->name)); + return strcmp (key, method_name (((SimdIntrinsic *)value)->name)); } typedef enum { @@ -686,7 +691,7 @@ mono_simd_simplify_indirection (MonoCompile *cfg) } DEBUG (printf ("[simd-simplify] max vreg is %d\n", max_vreg)); - vreg_flags = g_malloc0 (max_vreg + 1); + vreg_flags = (char *)g_malloc0 (max_vreg + 1); target_bb = g_new0 (MonoBasicBlock*, max_vreg + 1); for (i = 0; i < cfg->num_varinfo; i++) { @@ -818,11 +823,16 @@ mono_simd_simplify_indirection (MonoCompile *cfg) static int get_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src) { + const char *spec = INS_INFO (src->opcode); + if (src->opcode == OP_XMOVE) { return src->sreg1; - } else if (src->type == STACK_VTYPE) { + } else if (spec [MONO_INST_DEST] == 'x') { + return src->dreg; + } else if (src->opcode == OP_VCALL || src->opcode == OP_VCALL_MEMBASE) { return src->dreg; } + g_warning ("get_simd_vreg:: could not infer source simd vreg for op"); mono_print_ins (src); g_assert_not_reached (); @@ -832,8 +842,10 @@ get_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src) * This function will load the value if needed. */ static int -load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect) +load_simd_vreg_class (MonoCompile *cfg, MonoClass *klass, MonoInst *src, gboolean *indirect) { + const char *spec = INS_INFO (src->opcode); + if (indirect) *indirect = FALSE; if (src->opcode == OP_XMOVE) { @@ -842,7 +854,7 @@ load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean * int res = ((MonoInst*)src->inst_p0)->dreg; NULLIFY_INS (src); return res; - } else if (src->type == STACK_VTYPE) { + } else if (spec [MONO_INST_DEST] == 'x') { return src->dreg; } else if (src->type == STACK_PTR || src->type == STACK_MP) { MonoInst *ins; @@ -850,7 +862,7 @@ load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean * *indirect = TRUE; MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE); - ins->klass = cmethod->klass; + ins->klass = klass; ins->sreg1 = src->dreg; ins->type = STACK_VTYPE; ins->dreg = alloc_ireg (cfg); @@ -862,6 +874,12 @@ load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean * g_assert_not_reached (); } +static int +load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect) +{ + return load_simd_vreg_class (cfg, cmethod->klass, src, indirect); +} + /*We share the var with fconv_to_r8_x to save some stack space.*/ static MonoInst* get_double_spill_area (MonoCompile *cfg) @@ -908,21 +926,244 @@ mono_type_to_expand_op (MonoType *type) } static int -get_simd_vreg_or_expanded_scalar (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, int position) +type_to_comp_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_PCMPEQB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_PCMPEQW; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_PCMPEQD; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_PCMPEQQ; + case MONO_TYPE_R4: + return OP_COMPPS; + case MONO_TYPE_R8: + return OP_COMPPD; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +type_to_gt_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_I1: + return OP_PCMPGTB; + case MONO_TYPE_I2: + return OP_PCMPGTW; + case MONO_TYPE_I4: + return OP_PCMPGTD; + case MONO_TYPE_I8: + return OP_PCMPGTQ; + default: + return -1; + } +} + +static int +type_to_padd_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_U1: + case MONO_TYPE_I1: + return OP_PADDB; + case MONO_TYPE_U2: + case MONO_TYPE_I2: + return OP_PADDW; + case MONO_TYPE_U4: + case MONO_TYPE_I4: + return OP_PADDD; + case MONO_TYPE_U8: + case MONO_TYPE_I8: + return OP_PADDQ; + case MONO_TYPE_R4: + return OP_ADDPS; + case MONO_TYPE_R8: + return OP_ADDPD; + default: + break; + } + return -1; +} + +static int +type_to_psub_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_U1: + case MONO_TYPE_I1: + return OP_PSUBB; + case MONO_TYPE_U2: + case MONO_TYPE_I2: + return OP_PSUBW; + case MONO_TYPE_U4: + case MONO_TYPE_I4: + return OP_PSUBD; + case MONO_TYPE_U8: + case MONO_TYPE_I8: + return OP_PSUBQ; + case MONO_TYPE_R4: + return OP_SUBPS; + case MONO_TYPE_R8: + return OP_SUBPD; + default: + break; + } + return -1; +} + +static int +type_to_pmul_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_U2: + case MONO_TYPE_I2: + return OP_PMULW; + case MONO_TYPE_U4: + case MONO_TYPE_I4: + return OP_PMULD; + case MONO_TYPE_R4: + return OP_MULPS; + case MONO_TYPE_R8: + return OP_MULPD; + case MONO_TYPE_U8: + /* PMULQ multiplies two 32 bit numbers into a 64 bit one */ + return -1; + case MONO_TYPE_I8: + return -1; + default: + break; + } + return -1; +} + +static int +type_to_pdiv_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_R4: + return OP_DIVPS; + case MONO_TYPE_R8: + return OP_DIVPD; + default: + break; + } + return -1; +} + +static int +type_to_pxor_op (MonoType *t) +{ + /* + * These opcodes have the same semantics, but using the + * correctly typed version is better for performance. + */ + switch (t->type) { + case MONO_TYPE_R4: + return OP_XORPS; + case MONO_TYPE_R8: + return OP_XORPD; + default: + return OP_PXOR; + } +} + +static int +type_to_pand_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_R4: + return OP_ANDPS; + case MONO_TYPE_R8: + return OP_ANDPD; + default: + return OP_PAND; + } +} + +static int +type_to_por_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_R4: + return OP_ORPS; + case MONO_TYPE_R8: + return OP_ORPD; + default: + return OP_POR; + } +} + +static int +type_to_pmin_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_R4: + return OP_MINPS; + case MONO_TYPE_R8: + return OP_MINPD; + case MONO_TYPE_I1: + return OP_PMINB; + case MONO_TYPE_U1: + return OP_PMINB_UN; + case MONO_TYPE_I2: + return OP_PMINW; + case MONO_TYPE_U2: + return OP_PMINW_UN; + case MONO_TYPE_I4: + return OP_PMIND; + case MONO_TYPE_U4: + return OP_PMIND_UN; + default: + return -1; + } +} + +static int +type_to_pmax_op (MonoType *t) +{ + switch (t->type) { + case MONO_TYPE_R4: + return OP_MAXPS; + case MONO_TYPE_R8: + return OP_MAXPD; + case MONO_TYPE_I1: + return OP_PMAXB; + case MONO_TYPE_U1: + return OP_PMAXB_UN; + case MONO_TYPE_I2: + return OP_PMAXW; + case MONO_TYPE_U2: + return OP_PMAXW_UN; + case MONO_TYPE_I4: + return OP_PMAXD; + case MONO_TYPE_U4: + return OP_PMAXD_UN; + default: + return -1; + } +} + +static int +get_simd_vreg_or_expanded_scalar (MonoCompile *cfg, MonoClass *klass, MonoType *param_type, MonoInst *src) { MonoInst *ins; - MonoMethodSignature *sig = mono_method_signature (cmethod); int expand_op; - g_assert (sig->param_count == 2); - g_assert (position == 0 || position == 1); - - if (mono_class_from_mono_type (sig->params [position])->simd_type) - return get_simd_vreg (cfg, cmethod, src); + if (mono_class_from_mono_type (param_type)->simd_type) + return get_simd_vreg (cfg, NULL, src); - expand_op = mono_type_to_expand_op (sig->params [position]); + expand_op = mono_type_to_expand_op (param_type); MONO_INST_NEW (cfg, ins, expand_op); - ins->klass = cmethod->klass; + ins->klass = klass; ins->sreg1 = src->dreg; ins->type = STACK_VTYPE; ins->dreg = alloc_ireg (cfg); @@ -936,29 +1177,45 @@ get_simd_vreg_or_expanded_scalar (MonoCompile *cfg, MonoMethod *cmethod, MonoIns return ins->dreg; } +/* + * simd_intrinsic_emit_binary_op: + * + * Emit a binary SIMD opcode. + * @LHS/@RHS are the two arguments, they can be either a SIMD type or a scalar one. Scalar arguments are + * expanded to the SIMD type. + */ static MonoInst* -simd_intrinsic_emit_binary (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_binary_op (MonoCompile *cfg, int opcode, int flags, MonoClass *klass, MonoType *lhs_type, MonoType *rhs_type, MonoInst *lhs, MonoInst *rhs) { MonoInst* ins; int left_vreg, right_vreg; - left_vreg = get_simd_vreg_or_expanded_scalar (cfg, cmethod, args [0], 0); - right_vreg = get_simd_vreg_or_expanded_scalar (cfg, cmethod, args [1], 1); - + left_vreg = get_simd_vreg_or_expanded_scalar (cfg, klass, lhs_type, lhs); + right_vreg = get_simd_vreg_or_expanded_scalar (cfg, klass, rhs_type, rhs); - MONO_INST_NEW (cfg, ins, intrinsic->opcode); - ins->klass = cmethod->klass; + MONO_INST_NEW (cfg, ins, opcode); + ins->klass = klass; ins->sreg1 = left_vreg; ins->sreg2 = right_vreg; ins->type = STACK_VTYPE; ins->dreg = alloc_ireg (cfg); - ins->inst_c0 = intrinsic->flags; + ins->inst_c0 = flags; MONO_ADD_INS (cfg->cbb, ins); return ins; } static MonoInst* -simd_intrinsic_emit_unary (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_binary (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +{ + MonoMethodSignature *sig = mono_method_signature (cmethod); + + g_assert (sig->param_count == 2); + + return simd_intrinsic_emit_binary_op (cfg, intrinsic->opcode, intrinsic->flags, cmethod->klass, sig->params [0], sig->params [1], args [0], args [1]); +} + +static MonoInst* +simd_intrinsic_emit_unary (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst* ins; int vreg; @@ -1066,7 +1323,7 @@ mono_type_to_slow_insert_op (MonoType *type) } static MonoInst* -simd_intrinsic_emit_setter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_setter (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins; MonoMethodSignature *sig = mono_method_signature (cmethod); @@ -1125,37 +1382,65 @@ simd_intrinsic_emit_setter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon return ins; } +/* + * simd_intrinsic_emit_getter_op: + * + * Emit IR for loading an element of a SIMD value. + * + * @klass is the simd type, @type is the element type. + */ static MonoInst* -simd_intrinsic_emit_getter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_getter_op (MonoCompile *cfg, int index, MonoClass *klass, MonoType *type, MonoInst *arg) { MonoInst *ins; - MonoMethodSignature *sig = mono_method_signature (cmethod); - int vreg, shift_bits = mono_type_elements_shift_bits (sig->ret); + int vreg, shift_bits; - vreg = load_simd_vreg (cfg, cmethod, args [0], NULL); + vreg = load_simd_vreg_class (cfg, klass, arg, NULL); + + if (type->type == MONO_TYPE_I8 || type->type == MONO_TYPE_U8 || type->type == MONO_TYPE_R8) { + MonoInst *ins; + gboolean is_r8 = type->type == MONO_TYPE_R8; + + MONO_INST_NEW (cfg, ins, is_r8 ? OP_EXTRACT_R8 : OP_EXTRACT_I8); + ins->klass = klass; + ins->sreg1 = vreg; + ins->inst_c0 = index; + if (is_r8) { + ins->type = STACK_R8; + ins->dreg = alloc_freg (cfg); + ins->backend.spill_var = get_double_spill_area (cfg); + } else { + ins->type = STACK_I8; + ins->dreg = alloc_lreg (cfg); + } + MONO_ADD_INS (cfg->cbb, ins); + return ins; + } + + shift_bits = mono_type_elements_shift_bits (type); - if ((intrinsic->opcode >> shift_bits) && !cfg->compile_llvm) { + if ((index >> shift_bits) && !cfg->compile_llvm) { MONO_INST_NEW (cfg, ins, OP_PSHUFLED); - ins->klass = cmethod->klass; + ins->klass = klass; ins->sreg1 = vreg; - ins->inst_c0 = intrinsic->opcode >> shift_bits; + ins->inst_c0 = index >> shift_bits; ins->type = STACK_VTYPE; ins->dreg = vreg = alloc_ireg (cfg); MONO_ADD_INS (cfg->cbb, ins); } - MONO_INST_NEW (cfg, ins, mono_type_to_extract_op (sig->ret)); - ins->klass = cmethod->klass; + MONO_INST_NEW (cfg, ins, mono_type_to_extract_op (type)); + ins->klass = klass; ins->sreg1 = vreg; ins->type = STACK_I4; ins->dreg = vreg = alloc_ireg (cfg); if (cfg->compile_llvm) - ins->inst_c0 = intrinsic->opcode; + ins->inst_c0 = index; else - ins->inst_c0 = intrinsic->opcode & ((1 << shift_bits) - 1); + ins->inst_c0 = index & ((1 << shift_bits) - 1); MONO_ADD_INS (cfg->cbb, ins); - if (sig->ret->type == MONO_TYPE_R4) { + if (type->type == MONO_TYPE_R4) { MONO_INST_NEW (cfg, ins, cfg->r4fp ? OP_ICONV_TO_R4_RAW : OP_MOVE_I4_TO_F); ins->klass = mono_defaults.single_class; ins->sreg1 = vreg; @@ -1168,7 +1453,15 @@ simd_intrinsic_emit_getter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon } static MonoInst* -simd_intrinsic_emit_long_getter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_getter (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +{ + MonoMethodSignature *sig = mono_method_signature (cmethod); + + return simd_intrinsic_emit_getter_op (cfg, intrinsic->opcode, cmethod->klass, sig->ret, args [0]); +} + +static MonoInst* +simd_intrinsic_emit_long_getter (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins; int vreg; @@ -1194,7 +1487,7 @@ simd_intrinsic_emit_long_getter (const SimdIntrinsc *intrinsic, MonoCompile *cfg } static MonoInst* -simd_intrinsic_emit_ctor (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_ctor (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins = NULL; int i, addr_reg; @@ -1202,6 +1495,7 @@ simd_intrinsic_emit_ctor (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoM MonoMethodSignature *sig = mono_method_signature (cmethod); int store_op = mono_type_to_store_membase (cfg, sig->params [0]); int arg_size = mono_type_size (sig->params [0], &i); + int opcode; if (sig->param_count == 1) { int dreg; @@ -1214,7 +1508,11 @@ simd_intrinsic_emit_ctor (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoM dreg = alloc_ireg (cfg); } - MONO_INST_NEW (cfg, ins, intrinsic->opcode); + if (intrinsic) + opcode = intrinsic->opcode; + else + opcode = mono_type_to_expand_op (sig->params [0]); + MONO_INST_NEW (cfg, ins, opcode); ins->klass = cmethod->klass; ins->sreg1 = args [1]->dreg; ins->type = STACK_VTYPE; @@ -1248,6 +1546,20 @@ simd_intrinsic_emit_ctor (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoM EMIT_NEW_STORE_MEMBASE (cfg, ins, store_op, addr_reg, i * arg_size, args [i + 1]->dreg); } + if (sig->param_count * arg_size < 16) { + /* If there are not enough arguments, fill the rest with 0s */ + for (i = sig->param_count; i < 16 / arg_size; ++i) { + switch (arg_size) { + case 4: + MONO_EMIT_NEW_STORE_MEMBASE_IMM (cfg, OP_STOREI4_MEMBASE_IMM, addr_reg, i * arg_size, 0); + break; + default: + g_assert_not_reached (); + break; + } + } + } + if (is_ldaddr) { /*Eliminate LDADDR if it's initing a local var*/ int vreg = ((MonoInst*)args [0]->inst_p0)->dreg; NULLIFY_INS (args [0]); @@ -1263,16 +1575,22 @@ simd_intrinsic_emit_ctor (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoM } static MonoInst* -simd_intrinsic_emit_cast (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_cast (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins; + MonoClass *klass; int vreg; vreg = get_simd_vreg (cfg, cmethod, args [0]); - //TODO macroize this + if (cmethod->is_inflated) + /* Vector */ + klass = mono_class_from_mono_type (mono_method_signature (cmethod)->ret); + else + klass = cmethod->klass; + MONO_INST_NEW (cfg, ins, OP_XMOVE); - ins->klass = cmethod->klass; + ins->klass = klass; ins->type = STACK_VTYPE; ins->sreg1 = vreg; ins->dreg = alloc_ireg (cfg); @@ -1281,7 +1599,7 @@ simd_intrinsic_emit_cast (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoM } static MonoInst* -simd_intrinsic_emit_shift (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_shift (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins; int vreg, vreg2 = -1, opcode = intrinsic->opcode; @@ -1322,23 +1640,22 @@ mono_op_is_packed_compare (int op) } static MonoInst* -simd_intrinsic_emit_equality (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_equality_op (MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args, int opcode, int flags) { MonoInst* ins; int left_vreg, right_vreg, tmp_vreg; - left_vreg = get_simd_vreg (cfg, cmethod, args [0]); + left_vreg = load_simd_vreg (cfg, cmethod, args [0], NULL); right_vreg = get_simd_vreg (cfg, cmethod, args [1]); - - MONO_INST_NEW (cfg, ins, intrinsic->opcode); + MONO_INST_NEW (cfg, ins, opcode); ins->klass = cmethod->klass; ins->sreg1 = left_vreg; ins->sreg2 = right_vreg; ins->type = STACK_VTYPE; ins->klass = cmethod->klass; ins->dreg = tmp_vreg = alloc_ireg (cfg); - ins->inst_c0 = intrinsic->flags; + ins->inst_c0 = flags; MONO_ADD_INS (cfg->cbb, ins); /*FIXME the next ops are SSE specific*/ @@ -1350,9 +1667,9 @@ simd_intrinsic_emit_equality (const SimdIntrinsc *intrinsic, MonoCompile *cfg, M MONO_ADD_INS (cfg->cbb, ins); /*FP ops have a not equal instruction, which means that we must test the results with OR semantics.*/ - if (mono_op_is_packed_compare (intrinsic->opcode) || intrinsic->flags == SIMD_COMP_EQ) { + if (mono_op_is_packed_compare (opcode) || flags == SIMD_COMP_EQ) { MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, tmp_vreg, 0xFFFF); - NEW_UNALU (cfg, ins, intrinsic->flags == SIMD_COMP_EQ ? OP_CEQ : OP_CLT_UN, tmp_vreg, -1); + NEW_UNALU (cfg, ins, flags == SIMD_COMP_EQ ? OP_CEQ : OP_CLT_UN, tmp_vreg, -1); } else { MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, tmp_vreg, 0); NEW_UNALU (cfg, ins, OP_CGT_UN, tmp_vreg, -1); @@ -1361,9 +1678,14 @@ simd_intrinsic_emit_equality (const SimdIntrinsc *intrinsic, MonoCompile *cfg, M return ins; } +static MonoInst* +simd_intrinsic_emit_equality (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +{ + return simd_intrinsic_emit_equality_op (cfg, cmethod, args, intrinsic->opcode, intrinsic->flags); +} static MonoInst* -simd_intrinsic_emit_shuffle (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_shuffle (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins; int vreg, vreg2 = -1; @@ -1396,7 +1718,7 @@ simd_intrinsic_emit_shuffle (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mo } static MonoInst* -simd_intrinsic_emit_load_aligned (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_load_aligned (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins; @@ -1410,7 +1732,7 @@ simd_intrinsic_emit_load_aligned (const SimdIntrinsc *intrinsic, MonoCompile *cf } static MonoInst* -simd_intrinsic_emit_store (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_store (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins; int vreg; @@ -1427,7 +1749,7 @@ simd_intrinsic_emit_store (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mono } static MonoInst* -simd_intrinsic_emit_extract_mask (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_extract_mask (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins; int vreg; @@ -1445,7 +1767,7 @@ simd_intrinsic_emit_extract_mask (const SimdIntrinsc *intrinsic, MonoCompile *cf } static MonoInst* -simd_intrinsic_emit_prefetch (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +simd_intrinsic_emit_prefetch (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst *ins; @@ -1457,6 +1779,19 @@ simd_intrinsic_emit_prefetch (const SimdIntrinsc *intrinsic, MonoCompile *cfg, M return ins; } +static MonoInst* +simd_intrinsic_emit_const (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) +{ + MonoInst *ins; + + MONO_INST_NEW (cfg, ins, intrinsic->opcode); + ins->klass = cmethod->klass; + ins->type = STACK_VTYPE; + ins->dreg = alloc_xreg (cfg); + MONO_ADD_INS (cfg->cbb, ins); + return ins; +} + static const char * simd_version_name (guint32 version) { @@ -1480,9 +1815,9 @@ simd_version_name (guint32 version) } static MonoInst* -emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args, const SimdIntrinsc *intrinsics, guint32 size) +emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args, const SimdIntrinsic *intrinsics, guint32 size) { - const SimdIntrinsc * result = mono_binary_search (cmethod->name, intrinsics, size, sizeof (SimdIntrinsc), &simd_intrinsic_compare_by_name); + const SimdIntrinsic *result = (const SimdIntrinsic *)mono_binary_search (cmethod->name, intrinsics, size, sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name); if (!result) { DEBUG (printf ("function doesn't have a simd intrinsic %s::%s/%d\n", cmethod->klass->name, cmethod->name, fsig->param_count)); return NULL; @@ -1634,11 +1969,29 @@ emit_simd_runtime_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodS return NULL; } +static gboolean +is_sys_numerics_assembly (MonoAssembly *assembly) +{ + return !strcmp ("System.Numerics", assembly->aname.name); +} + +static gboolean +is_sys_numerics_vectors_assembly (MonoAssembly *assembly) +{ + return !strcmp ("System.Numerics.Vectors", assembly->aname.name); +} + MonoInst* mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) { const char *class_name; + if (is_sys_numerics_assembly (cmethod->klass->image->assembly)) + return emit_sys_numerics_intrinsics (cfg, cmethod, fsig, args); + + if (is_sys_numerics_vectors_assembly (cmethod->klass->image->assembly)) + return emit_sys_numerics_vectors_intrinsics (cfg, cmethod, fsig, args); + if (strcmp ("Mono.Simd", cmethod->klass->image->assembly->aname.name) || strcmp ("Mono.Simd", cmethod->klass->name_space)) return NULL; @@ -1659,28 +2012,516 @@ mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign cfg->uses_simd_intrinsics = 1; if (!strcmp ("Vector2d", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector2d_intrinsics, sizeof (vector2d_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector2d_intrinsics, sizeof (vector2d_intrinsics) / sizeof (SimdIntrinsic)); if (!strcmp ("Vector4f", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector4f_intrinsics, sizeof (vector4f_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector4f_intrinsics, sizeof (vector4f_intrinsics) / sizeof (SimdIntrinsic)); if (!strcmp ("Vector2ul", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector2ul_intrinsics, sizeof (vector2ul_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector2ul_intrinsics, sizeof (vector2ul_intrinsics) / sizeof (SimdIntrinsic)); if (!strcmp ("Vector2l", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector2l_intrinsics, sizeof (vector2l_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector2l_intrinsics, sizeof (vector2l_intrinsics) / sizeof (SimdIntrinsic)); if (!strcmp ("Vector4ui", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector4ui_intrinsics, sizeof (vector4ui_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector4ui_intrinsics, sizeof (vector4ui_intrinsics) / sizeof (SimdIntrinsic)); if (!strcmp ("Vector4i", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector4i_intrinsics, sizeof (vector4i_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector4i_intrinsics, sizeof (vector4i_intrinsics) / sizeof (SimdIntrinsic)); if (!strcmp ("Vector8us", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector8us_intrinsics, sizeof (vector8us_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector8us_intrinsics, sizeof (vector8us_intrinsics) / sizeof (SimdIntrinsic)); if (!strcmp ("Vector8s", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector8s_intrinsics, sizeof (vector8s_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector8s_intrinsics, sizeof (vector8s_intrinsics) / sizeof (SimdIntrinsic)); if (!strcmp ("Vector16b", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector16b_intrinsics, sizeof (vector16b_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector16b_intrinsics, sizeof (vector16b_intrinsics) / sizeof (SimdIntrinsic)); if (!strcmp ("Vector16sb", class_name)) - return emit_intrinsics (cfg, cmethod, fsig, args, vector16sb_intrinsics, sizeof (vector16sb_intrinsics) / sizeof (SimdIntrinsc)); + return emit_intrinsics (cfg, cmethod, fsig, args, vector16sb_intrinsics, sizeof (vector16sb_intrinsics) / sizeof (SimdIntrinsic)); + + return NULL; +} + +static void +assert_handled (MonoCompile *cfg, MonoMethod *method) +{ + MonoCustomAttrInfo *cattr; + MonoError error; + + if (cfg->verbose_level > 1) { + cattr = mono_custom_attrs_from_method_checked (method, &error); + + if (cattr) { + gboolean has_attr = FALSE; + for (int i = 0; i < cattr->num_attrs; ++i) + if (cattr->attrs [i].ctor && (!strcmp (cattr->attrs [i].ctor->klass->name, "JitIntrinsicAttribute"))) + has_attr = TRUE; + if (has_attr) { + printf ("SIMD intrinsic unhandled: %s\n", mono_method_get_name_full (method, TRUE, TRUE, MONO_TYPE_NAME_FORMAT_IL)); + fflush (stdout); + //g_assert_not_reached (); + } + mono_custom_attrs_free (cattr); + } + } +} + +// The entries should be ordered by name +// System.Numerics.Vector2/Vector3/Vector4 +static const SimdIntrinsic vector2_intrinsics[] = { + { SN_ctor, OP_EXPAND_R4 }, + { SN_Abs }, + { SN_Dot, OP_DPPS }, + { SN_Equals, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ }, + { SN_Max, OP_MAXPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_Min, OP_MINPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_SquareRoot, OP_SQRTPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY }, + { SN_op_Addition, OP_ADDPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Division, OP_DIVPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Multiply, OP_MULPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Subtraction, OP_SUBPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, +}; + +static MonoInst* +emit_vector_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) +{ + const SimdIntrinsic *intrins; + MonoMethodSignature *sig = mono_method_signature (cmethod); + MonoType *type = &cmethod->klass->byval_arg; + + /* + * Vector2/3/4 are handled the same way, since the underlying SIMD type is the same (4 * r4). + */ + intrins = (const SimdIntrinsic*)mono_binary_search (cmethod->name, vector2_intrinsics, sizeof (vector2_intrinsics) / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name); + if (!intrins) { + assert_handled (cfg, cmethod); + return NULL; + } + + if (cfg->verbose_level > 1) { + char *name = mono_method_full_name (cmethod, TRUE); + printf (" SIMD intrinsic %s\n", name); + g_free (name); + } + + switch (intrins->name) { + case SN_ctor: { + gboolean match = TRUE; + for (int i = 0; i < fsig->param_count; ++i) + if (fsig->params [i]->type != MONO_TYPE_R4) + match = FALSE; + if (!match) + break; + return simd_intrinsic_emit_ctor (intrins, cfg, cmethod, args); + } + case SN_Equals: + if (!(fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && fsig->params [0] == type)) + break; + return simd_intrinsic_emit_equality (intrins, cfg, cmethod, args); + case SN_SquareRoot: + if (!(fsig->param_count == 1 && fsig->ret == type && fsig->params [0] == type)) + break; + return simd_intrinsic_emit_unary (intrins, cfg, cmethod, args); + case SN_Dot: + if (!(fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_R4 && fsig->params [0] == type && fsig->params [1] == type)) + break; + if (COMPILE_LLVM (cfg)) { + MonoInst *ins; + + ins = simd_intrinsic_emit_binary (intrins, cfg, cmethod, args); + /* The end result is in the lowest element */ + return simd_intrinsic_emit_getter_op (cfg, 0, cmethod->klass, mono_method_signature (cmethod)->ret, ins); + } + break; + case SN_Abs: { + // abs(x) = max(x, sub(0,x)) + MonoInst *sub; + MonoInst *zero; + + if (!(fsig->param_count == 1 && fsig->ret == type && fsig->params [0] == type)) + break; + + MONO_INST_NEW (cfg, zero, OP_XZERO); + zero->dreg = alloc_xreg (cfg); + zero->klass = cmethod->klass; + MONO_ADD_INS (cfg->cbb, zero); + + sub = simd_intrinsic_emit_binary_op (cfg, OP_SUBPS, 0, cmethod->klass, sig->params [0], sig->params [0], zero, args [0]); + return simd_intrinsic_emit_binary_op (cfg, OP_MAXPS, 0, cmethod->klass, sig->params [0], sig->params [0], args [0], sub); + } + case SN_Max: + case SN_Min: + case SN_op_Addition: + case SN_op_Division: + case SN_op_Multiply: + case SN_op_Subtraction: + if (!(fsig->param_count == 2 && fsig->ret == type && (fsig->params [0] == type || fsig->params [0]->type == MONO_TYPE_R4) && (fsig->params [1] == type || fsig->params [1]->type == MONO_TYPE_R4))) + break; + return simd_intrinsic_emit_binary (intrins, cfg, cmethod, args); + default: + break; + } + + assert_handled (cfg, cmethod); + if (cfg->verbose_level > 1) { + char *name = mono_method_full_name (cmethod, TRUE); + printf (" SIMD method %s not handled.\n", name); + g_free (name); + } + return NULL; +} + +static MonoInst* +emit_vector_is_hardware_accelerated_intrinsic (MonoCompile *cfg) +{ + MonoInst *ins; + + if (simd_supported_versions) + EMIT_NEW_ICONST (cfg, ins, 1); + else + EMIT_NEW_ICONST (cfg, ins, 0); + ins->type = STACK_I4; + return ins; +} + +/* These should be ordered by name */ +static const SimdIntrinsic vector_t_intrinsics[] = { + { SN_ctor }, + { SN_Abs }, + { SN_CopyTo }, + { SN_Equals }, + { SN_GreaterThan }, + { SN_GreaterThanOrEqual }, + { SN_LessThan }, + { SN_LessThanOrEqual }, + { SN_Max }, + { SN_Min }, + { SN_get_AllOnes, OP_XONES }, + { SN_get_Count }, + { SN_get_Item }, + { SN_get_Zero, OP_XZERO }, + { SN_op_Addition }, + { SN_op_BitwiseAnd }, + { SN_op_BitwiseOr }, + { SN_op_Division }, + { SN_op_ExclusiveOr }, + { SN_op_Explicit }, + { SN_op_Multiply }, + { SN_op_Subtraction } +}; + +static MonoInst* +emit_vector_t_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) +{ + const SimdIntrinsic *intrins; + MonoType *type, *etype; + MonoInst *ins; + int size, len, index; + + intrins = (const SimdIntrinsic*)mono_binary_search (cmethod->name, vector_t_intrinsics, sizeof (vector_t_intrinsics) / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name); + if (!intrins) { + assert_handled (cfg, cmethod); + return NULL; + } + + type = &cmethod->klass->byval_arg; + etype = mono_class_get_context (cmethod->klass)->class_inst->type_argv [0]; + size = mono_class_value_size (mono_class_from_mono_type (etype), NULL); + g_assert (size); + len = 16 / size; + + if (!MONO_TYPE_IS_PRIMITIVE (etype)) + return NULL; + + if (cfg->verbose_level > 1) { + char *name = mono_method_full_name (cmethod, TRUE); + printf (" SIMD intrinsic %s\n", name); + g_free (name); + } + + switch (intrins->name) { + case SN_get_Count: + if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4)) + break; + EMIT_NEW_ICONST (cfg, ins, len); + return ins; + case SN_get_AllOnes: + case SN_get_Zero: + if (!(fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type))) + break; + return simd_intrinsic_emit_const (intrins, cfg, cmethod, args); + case SN_get_Item: + g_assert (fsig->param_count == 1); + if (args [1]->opcode != OP_ICONST) + return NULL; + index = args [1]->inst_c0; + if (index < 0 || index >= len) + return NULL; + return simd_intrinsic_emit_getter_op (cfg, index, cmethod->klass, etype, args [0]); + case SN_ctor: + if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) + return simd_intrinsic_emit_ctor (NULL, cfg, cmethod, args); + if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) { + MonoInst *array_ins = args [1]; + MonoInst *index_ins; + MonoInst *ldelema_ins; + MonoInst *var; + int end_index_reg; + + if (args [0]->opcode != OP_LDADDR) + return NULL; + + /* .ctor (T[]) or .ctor (T[], index) */ + + if (fsig->param_count == 2) { + index_ins = args [2]; + } else { + EMIT_NEW_ICONST (cfg, index_ins, 0); + } + + /* Emit index check for the end (index + len - 1 < array length) */ + end_index_reg = alloc_ireg (cfg); + EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1); + MONO_EMIT_BOUNDS_CHECK (cfg, array_ins->dreg, MonoArray, max_length, end_index_reg); + + /* Load the array slice into the simd reg */ + ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type (etype), array_ins, index_ins, TRUE); + g_assert (args [0]->opcode == OP_LDADDR); + var = args [0]->inst_p0; + EMIT_NEW_LOAD_MEMBASE (cfg, ins, OP_LOADX_MEMBASE, var->dreg, ldelema_ins->dreg, 0); + ins->klass = cmethod->klass; + return args [0]; + } + break; + case SN_op_Explicit: + return simd_intrinsic_emit_cast (intrins, cfg, cmethod, args); + case SN_Equals: + if (fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type)) + return simd_intrinsic_emit_equality_op (cfg, cmethod, args, type_to_comp_op (etype), SIMD_COMP_EQ); + if (fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, type) && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type)) + return simd_intrinsic_emit_binary_op (cfg, type_to_comp_op (etype), 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]); + break; + + case SN_GreaterThan: + case SN_GreaterThanOrEqual: + case SN_LessThan: + case SN_LessThanOrEqual: { + MonoInst *cmp1, *cmp2; + int eq_op, gt_op; + + switch (etype->type) { + case MONO_TYPE_I1: + case MONO_TYPE_I2: + case MONO_TYPE_I4: + case MONO_TYPE_I8: + break; + default: + return NULL; + } + + eq_op = type_to_comp_op (etype); + gt_op = type_to_gt_op (etype); + + switch (intrins->name) { + case SN_GreaterThan: + return simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]); + case SN_LessThan: + return simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]); + case SN_LessThanOrEqual: + cmp1 = simd_intrinsic_emit_binary_op (cfg, eq_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]); + cmp2 = simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]); + return simd_intrinsic_emit_binary_op (cfg, OP_POR, 0, cmethod->klass, fsig->params [0], fsig->params [1], cmp1, cmp2); + case SN_GreaterThanOrEqual: + cmp1 = simd_intrinsic_emit_binary_op (cfg, eq_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]); + cmp2 = simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]); + return simd_intrinsic_emit_binary_op (cfg, OP_POR, 0, cmethod->klass, fsig->params [0], fsig->params [1], cmp1, cmp2); + default: + g_assert_not_reached (); + break; + } + } + case SN_Abs: + /* Vector.Abs */ + switch (etype->type) { + case MONO_TYPE_U1: + case MONO_TYPE_U2: + case MONO_TYPE_U4: + case MONO_TYPE_U8: { + MonoInst *ins; + + /* No-op */ + MONO_INST_NEW (cfg, ins, OP_XMOVE); + ins->klass = cmethod->klass; + ins->type = STACK_VTYPE; + ins->sreg1 = args [0]->dreg; + ins->dreg = alloc_xreg (cfg); + MONO_ADD_INS (cfg->cbb, ins); + return ins; + } + default: + break; + } + break; + case SN_op_Addition: + case SN_op_Subtraction: + case SN_op_Multiply: + case SN_op_Division: + case SN_op_ExclusiveOr: + case SN_op_BitwiseAnd: + case SN_op_BitwiseOr: + case SN_Max: + case SN_Min: { + if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, fsig->params [0]) && mono_metadata_type_equal (fsig->params [0], fsig->params [1]))) + break; + int op = 0; + switch (intrins->name) { + case SN_op_Addition: + op = type_to_padd_op (etype); + break; + case SN_op_Subtraction: + op = type_to_psub_op (etype); + break; + case SN_op_Multiply: + op = type_to_pmul_op (etype); + break; + case SN_op_Division: + op = type_to_pdiv_op (etype); + break; + case SN_op_ExclusiveOr: + op = type_to_pxor_op (etype); + break; + case SN_op_BitwiseAnd: + op = type_to_pand_op (etype); + break; + case SN_op_BitwiseOr: + op = type_to_por_op (etype); + break; + case SN_Min: + op = type_to_pmin_op (etype); + break; + case SN_Max: + op = type_to_pmax_op (etype); + break; + default: + g_assert_not_reached (); + } + if (op != -1) + return simd_intrinsic_emit_binary_op (cfg, op, 0, cmethod->klass, fsig->params [0], fsig->params [0], args [0], args [1]); + break; + } + case SN_CopyTo: { + MonoInst *array_ins = args [1]; + MonoInst *index_ins = args [2]; + MonoInst *ldelema_ins; + MonoInst *var; + int end_index_reg; + + if (args [0]->opcode != OP_LDADDR) + return NULL; + + /* Emit index check for the end (index + len - 1 < array length) */ + end_index_reg = alloc_ireg (cfg); + EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1); + + int length_reg = alloc_ireg (cfg); + MONO_EMIT_NEW_LOAD_MEMBASE_OP_FAULT (cfg, OP_LOADI4_MEMBASE, length_reg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length)); + MONO_EMIT_NEW_BIALU (cfg, OP_COMPARE, -1, length_reg, end_index_reg); + MONO_EMIT_NEW_COND_EXC (cfg, LE_UN, "ArgumentException"); + + /* Load the simd reg into the array slice */ + ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type (etype), array_ins, index_ins, TRUE); + g_assert (args [0]->opcode == OP_LDADDR); + var = args [0]->inst_p0; + EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, ldelema_ins->dreg, 0, var->dreg); + ins->klass = cmethod->klass; + return args [0]; + break; + } + default: + break; + } + + assert_handled (cfg, cmethod); + + if (cfg->verbose_level > 1) { + char *name = mono_method_full_name (cmethod, TRUE); + printf (" SIMD method %s not handled.\n", name); + g_free (name); + } + + return NULL; +} + +/* + * emit_sys_numerics_intrinsics: + * + * Emit intrinsics for the System.Numerics assembly. + */ +static MonoInst* +emit_sys_numerics_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) +{ + const char *nspace = cmethod->klass->name_space; + const char *class_name = cmethod->klass->name; + + if (!strcmp ("Vector2", class_name) || !strcmp ("Vector4", class_name) || !strcmp ("Vector3", class_name)) + return emit_vector_intrinsics (cfg, cmethod, fsig, args); + + if (!strcmp ("System.Numerics", nspace) && !strcmp ("Vector", class_name)) { + if (!strcmp (cmethod->name, "get_IsHardwareAccelerated")) + return emit_vector_is_hardware_accelerated_intrinsic (cfg); + } + + return NULL; +} + +static MonoInst* +emit_sys_numerics_vectors_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) +{ + const char *nspace = cmethod->klass->name_space; + const char *class_name = cmethod->klass->name; + + if (!strcmp (class_name, "Vector`1")) + return emit_vector_t_intrinsics (cfg, cmethod, fsig, args); + + if (!strcmp ("System.Numerics", nspace) && !strcmp ("Vector", class_name)) { + if (!strcmp (cmethod->name, "get_IsHardwareAccelerated")) + return emit_vector_is_hardware_accelerated_intrinsic (cfg); + } + + return NULL; +} + +MonoInst* +mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr) +{ + if (is_sys_numerics_assembly (field->parent->image->assembly)) { + int index = -1; + + if (!strcmp (field->parent->name, "Vector2") || + !strcmp (field->parent->name, "Vector3") || + !strcmp (field->parent->name, "Vector4")) { + if (!strcmp (field->name, "X")) + index = 0; + else if (!strcmp (field->name, "Y")) + index = 1; + else if (!strcmp (field->name, "Z")) + index = 2; + else if (!strcmp (field->name, "W")) + index = 3; + } + + if (index != -1) { + if (cfg->verbose_level > 1) + printf (" SIMD intrinsic field access: %s\n", field->name); + + return simd_intrinsic_emit_getter_op (cfg, index, field->parent, mono_field_get_type (field), addr); + } + } return NULL; } #endif /* DISABLE_JIT */ + +#else + +MonoInst* +mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr) +{ + return NULL; +} + #endif /* MONO_ARCH_SIMD_INTRINSICS */