#include "mini.h"
#include "ir-emit.h"
+#include "mono/utils/bsearch.h"
+#include <mono/metadata/abi-details.h>
/*
General notes on SIMD intrinsics
TODO add stuff to man pages
TODO document this under /docs
TODO make passing a xmm as argument not cause it to be LDADDR'ed (introduce an OP_XPUSH)
-TODO revamp the .ctor sequence as it looks very fragile, maybe use a var just like iconv_to_r8_raw. (or just pinst sse ops)
+TODO revamp the .ctor sequence as it looks very fragile, maybe use a var just like move_i4_to_f. (or just pinst sse ops)
TODO figure out what's wrong with OP_STOREX_MEMBASE_REG and OP_STOREX_MEMBASE (the 2nd is for imm operands)
TODO maybe add SSE3 emulation on top of SSE2, or just implement the corresponding functions using SSE2 intrinsics.
TODO pass simd arguments in registers or, at least, add SSE support for pushing large (>=16) valuetypes
without a OP_LDADDR.
*/
-#ifdef MONO_ARCH_SIMD_INTRINSICS
+#if defined (MONO_ARCH_SIMD_INTRINSICS)
+
+#if defined (DISABLE_JIT)
+
+void
+mono_simd_intrinsics_init (void)
+{
+}
+
+#else
//#define IS_DEBUG_ON(cfg) (0)
{ SN_CompareNotLessThan, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT },
{ SN_CompareOrdered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD },
{ SN_CompareUnordered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD },
+ { SN_ConvertToDouble, OP_CVTPS2PD, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToInt, OP_CVTPS2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToIntTruncated, OP_CVTTPS2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
{ SN_DuplicateHigh, OP_DUPPS_HIGH, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
{ SN_DuplicateLow, OP_DUPPS_LOW, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
{ SN_HorizontalAdd, OP_HADDPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
{ SN_CompareNotLessThan, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT },
{ SN_CompareOrdered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD },
{ SN_CompareUnordered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD },
+ { SN_ConvertToFloat, OP_CVTPD2PS, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToInt, OP_CVTPD2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToIntTruncated, OP_CVTTPD2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
{ SN_Duplicate, OP_DUPPD, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
{ SN_HorizontalAdd, OP_HADDPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
{ SN_HorizontalSub, OP_HSUBPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
{ SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
+ { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
{ SN_Sqrt, OP_SQRTPD, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
{ SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
{ SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
+ { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
{ SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
{ SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
+ { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
{ SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
{ SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_CompareGreaterThan, OP_PCMPGTD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
+ { SN_ConvertToDouble, OP_CVTDQ2PD, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToFloat, OP_CVTDQ2PS, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
{ SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
{ SN_LogicalRightShift, OP_PSHRD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
{ SN_Max, OP_PMAXD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
num_sregs = mono_inst_get_src_registers (ins, sregs);
for (j = 0; j < num_sregs; ++j) {
- if (sregs [i] == var->dreg)
+ if (sregs [j] == var->dreg)
found = TRUE;
}
/*We can avoid inserting the XZERO if the first use doesn't depend on the zero'ed value.*/
if (ins->dreg == var->dreg && !found) {
+ DEBUG (printf ("[simd-simplify] INGORING R%d on BB %d because first op is a def", i, target_bb [var->dreg]->block_num););
break;
} else if (found) {
+ DEBUG (printf ("[simd-simplify] Adding XZERO for R%d on BB %d: ", i, target_bb [var->dreg]->block_num); );
MonoInst *tmp;
MONO_INST_NEW (cfg, tmp, OP_XZERO);
tmp->dreg = var->dreg;
}
for (ins = first_bb->code; ins; ins = ins->next) {
- if (ins->opcode == OP_XZERO && (vreg_flags [ins->dreg] & VREG_SINGLE_BB_USE))
+ if (ins->opcode == OP_XZERO && (vreg_flags [ins->dreg] & VREG_SINGLE_BB_USE)) {
+ DEBUG (printf ("[simd-simplify] Nullify %d on first BB: ", ins->dreg); mono_print_ins(ins));
NULLIFY_INS (ins);
+ }
}
g_free (vreg_flags);
g_assert_not_reached ();
}
-static MonoInst*
-get_int_to_float_spill_area (MonoCompile *cfg)
-{
- if (!cfg->iconv_raw_var) {
- cfg->iconv_raw_var = mono_compile_create_var (cfg, &mono_defaults.int32_class->byval_arg, OP_LOCAL);
- cfg->iconv_raw_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
- }
- return cfg->iconv_raw_var;
-}
-
/*We share the var with fconv_to_r8_x to save some stack space.*/
static MonoInst*
get_double_spill_area (MonoCompile *cfg)
return OP_EXPAND_R4;
case MONO_TYPE_R8:
return OP_EXPAND_R8;
+ default:
+ g_assert_not_reached ();
}
- g_assert_not_reached ();
}
static int
MONO_ADD_INS (cfg->cbb, ins);
if (expand_op == OP_EXPAND_R4)
- ins->backend.spill_var = get_int_to_float_spill_area (cfg);
+ ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
else if (expand_op == OP_EXPAND_R8)
ins->backend.spill_var = get_double_spill_area (cfg);
case MONO_TYPE_U4:
case MONO_TYPE_R4:
return OP_EXTRACT_I4;
+ default:
+ g_assert_not_reached ();
}
- g_assert_not_reached ();
}
/*Returns the amount to shift the element index to get the dword it belongs to*/
case MONO_TYPE_U4:
case MONO_TYPE_R4:
return 0;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+static G_GNUC_UNUSED int
+mono_type_to_insert_op (MonoType *type)
+{
+ switch (type->type) {
+ case MONO_TYPE_I1:
+ case MONO_TYPE_U1:
+ return OP_INSERT_I1;
+ case MONO_TYPE_I2:
+ case MONO_TYPE_U2:
+ return OP_INSERT_I2;
+ case MONO_TYPE_I4:
+ case MONO_TYPE_U4:
+ return OP_INSERT_I4;
+ case MONO_TYPE_I8:
+ case MONO_TYPE_U8:
+ return OP_INSERT_I8;
+ case MONO_TYPE_R4:
+ return OP_INSERT_R4;
+ case MONO_TYPE_R8:
+ return OP_INSERT_R8;
+ default:
+ g_assert_not_reached ();
}
- g_assert_not_reached ();
}
static int
return OP_INSERTX_R4_SLOW;
case MONO_TYPE_R8:
return OP_INSERTX_R8_SLOW;
+ default:
+ g_assert_not_reached ();
}
- g_assert_not_reached ();
}
static MonoInst*
size = mono_type_size (sig->params [0], &align);
- if (size == 2 || size == 4 || size == 8) {
+ if (COMPILE_LLVM (cfg)) {
+ MONO_INST_NEW (cfg, ins, mono_type_to_insert_op (sig->params [0]));
+ ins->klass = cmethod->klass;
+ ins->dreg = ins->sreg1 = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
+ ins->sreg2 = args [1]->dreg;
+ ins->inst_c0 = intrinsic->opcode;
+ MONO_ADD_INS (cfg->cbb, ins);
+ } else if (size == 2 || size == 4 || size == 8) {
MONO_INST_NEW (cfg, ins, mono_type_to_slow_insert_op (sig->params [0]));
ins->klass = cmethod->klass;
/*This is a partial load so we encode the dependency on the previous value by setting dreg and sreg1 to the same value.*/
ins->sreg2 = args [1]->dreg;
ins->inst_c0 = intrinsic->opcode;
if (sig->params [0]->type == MONO_TYPE_R4)
- ins->backend.spill_var = get_int_to_float_spill_area (cfg);
+ ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
else if (sig->params [0]->type == MONO_TYPE_R8)
ins->backend.spill_var = get_double_spill_area (cfg);
MONO_ADD_INS (cfg->cbb, ins);
MONO_ADD_INS (cfg->cbb, ins);
if (sig->ret->type == MONO_TYPE_R4) {
- MONO_INST_NEW (cfg, ins, OP_ICONV_TO_R8_RAW);
+ MONO_INST_NEW (cfg, ins, cfg->r4fp ? OP_ICONV_TO_R4_RAW : OP_MOVE_I4_TO_F);
ins->klass = mono_defaults.single_class;
ins->sreg1 = vreg;
- ins->type = STACK_R8;
+ ins->type = cfg->r4_stack_type;
ins->dreg = alloc_freg (cfg);
- ins->backend.spill_var = get_int_to_float_spill_area (cfg);
- MONO_ADD_INS (cfg->cbb, ins);
+ ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
+ MONO_ADD_INS (cfg->cbb, ins);
}
return ins;
}
MONO_ADD_INS (cfg->cbb, ins);
if (sig->params [0]->type == MONO_TYPE_R4)
- ins->backend.spill_var = get_int_to_float_spill_area (cfg);
+ ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
else if (sig->params [0]->type == MONO_TYPE_R8)
ins->backend.spill_var = get_double_spill_area (cfg);
simd_intrinsic_emit_shuffle (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
{
MonoInst *ins;
- int vreg;
-
- /*TODO Exposing shuffle is not a good thing as it's non obvious. We should come up with better abstractions*/
+ int vreg, vreg2 = -1;
+ int param_count = mono_method_signature (cmethod)->param_count;
- if (args [1]->opcode != OP_ICONST) {
+ if (args [param_count - 1]->opcode != OP_ICONST) {
/*TODO Shuffle with non literals is not yet supported */
return NULL;
}
+
vreg = get_simd_vreg (cfg, cmethod, args [0]);
- NULLIFY_INS (args [1]);
+ if (param_count == 3)
+ vreg2 = get_simd_vreg (cfg, cmethod, args [1]);
+
+ NULLIFY_INS (args [param_count - 1]);
+
MONO_INST_NEW (cfg, ins, intrinsic->opcode);
ins->klass = cmethod->klass;
ins->sreg1 = vreg;
- ins->inst_c0 = args [1]->inst_c0;
+ ins->sreg2 = vreg2;
+ ins->inst_c0 = args [param_count - 1]->inst_c0;
ins->type = STACK_VTYPE;
ins->dreg = alloc_ireg (cfg);
MONO_ADD_INS (cfg->cbb, ins);
+
+ if (param_count == 3 && ins->opcode == OP_PSHUFLED)
+ ins->opcode = OP_SHUFPS;
return ins;
}
static MonoInst*
emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args, const SimdIntrinsc *intrinsics, guint32 size)
{
- const SimdIntrinsc * result = bsearch (cmethod->name, intrinsics, size, sizeof (SimdIntrinsc), &simd_intrinsic_compare_by_name);
+ const SimdIntrinsc * result = mono_binary_search (cmethod->name, intrinsics, size, sizeof (SimdIntrinsc), &simd_intrinsic_compare_by_name);
if (!result) {
DEBUG (printf ("function doesn't have a simd intrinsic %s::%s/%d\n", cmethod->klass->name, cmethod->name, fsig->param_count));
return NULL;
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_MUL_IMM, mult_reg, index2_reg, size);
MONO_EMIT_NEW_BIALU (cfg, OP_PADD, add_reg, array_reg, mult_reg);
- NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, add_reg, add_reg, G_STRUCT_OFFSET (MonoArray, vector));
+ NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, add_reg, add_reg, MONO_STRUCT_OFFSET (MonoArray, vector));
ins->type = STACK_PTR;
MONO_ADD_INS (cfg->cbb, ins);
static MonoInst*
emit_array_extension_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
- if (!strcmp ("GetVector", cmethod->name) || !strcmp ("GetVectorAligned", cmethod->name)) {
+ if ((!strcmp ("GetVector", cmethod->name) || !strcmp ("GetVectorAligned", cmethod->name)) && fsig->param_count == 2) {
MonoInst *load;
int addr = mono_emit_vector_ldelema (cfg, fsig->params [0], args [0], args [1], TRUE);
return load;
}
- if (!strcmp ("SetVector", cmethod->name) || !strcmp ("SetVectorAligned", cmethod->name)) {
+ if ((!strcmp ("SetVector", cmethod->name) || !strcmp ("SetVectorAligned", cmethod->name)) && fsig->param_count == 3) {
MonoInst *store;
int vreg = get_simd_vreg (cfg, cmethod, args [1]);
int addr = mono_emit_vector_ldelema (cfg, fsig->params [0], args [0], args [2], TRUE);
return store;
}
- if (!strcmp ("IsAligned", cmethod->name)) {
+ if (!strcmp ("IsAligned", cmethod->name) && fsig->param_count == 2) {
MonoInst *ins;
int addr = mono_emit_vector_ldelema (cfg, fsig->params [0], args [0], args [1], FALSE);
static MonoInst*
emit_simd_runtime_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
- if (!strcmp ("get_AccelMode", cmethod->name)) {
+ if (!strcmp ("get_AccelMode", cmethod->name) && fsig->param_count == 0) {
MonoInst *ins;
EMIT_NEW_ICONST (cfg, ins, simd_supported_versions);
return ins;
{
const char *class_name;
- if (strcmp ("Mono.Simd", cmethod->klass->name_space))
+ if (strcmp ("Mono.Simd", cmethod->klass->image->assembly->aname.name) ||
+ strcmp ("Mono.Simd", cmethod->klass->name_space))
return NULL;
class_name = cmethod->klass->name;
return NULL;
}
-#endif
+#endif /* DISABLE_JIT */
+#endif /* MONO_ARCH_SIMD_INTRINSICS */