#include "mini.h"
#include "ir-emit.h"
+#include "mono/utils/bsearch.h"
+#include <mono/metadata/abi-details.h>
/*
General notes on SIMD intrinsics
{ SN_CompareNotLessThan, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT },
{ SN_CompareOrdered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD },
{ SN_CompareUnordered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD },
+ { SN_ConvertToDouble, OP_CVTPS2PD, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToInt, OP_CVTPS2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToIntTruncated, OP_CVTTPS2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
{ SN_DuplicateHigh, OP_DUPPS_HIGH, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
{ SN_DuplicateLow, OP_DUPPS_LOW, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
{ SN_HorizontalAdd, OP_HADDPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
{ SN_CompareNotLessThan, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT },
{ SN_CompareOrdered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD },
{ SN_CompareUnordered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD },
+ { SN_ConvertToFloat, OP_CVTPD2PS, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToInt, OP_CVTPD2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToIntTruncated, OP_CVTTPD2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
{ SN_Duplicate, OP_DUPPD, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
{ SN_HorizontalAdd, OP_HADDPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
{ SN_HorizontalSub, OP_HSUBPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
{ SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
+ { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
{ SN_Sqrt, OP_SQRTPD, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
{ SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
{ SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
+ { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
{ SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
{ SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
{ SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
+ { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
{ SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
{ SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
{ SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
{ SN_CompareGreaterThan, OP_PCMPGTD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
+ { SN_ConvertToDouble, OP_CVTDQ2PD, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
+ { SN_ConvertToFloat, OP_CVTDQ2PS, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
{ SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
{ SN_LogicalRightShift, OP_PSHRD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
{ SN_Max, OP_PMAXD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
return OP_EXPAND_R4;
case MONO_TYPE_R8:
return OP_EXPAND_R8;
+ default:
+ g_assert_not_reached ();
}
- g_assert_not_reached ();
}
static int
case MONO_TYPE_U4:
case MONO_TYPE_R4:
return OP_EXTRACT_I4;
+ default:
+ g_assert_not_reached ();
}
- g_assert_not_reached ();
}
/*Returns the amount to shift the element index to get the dword it belongs to*/
case MONO_TYPE_U4:
case MONO_TYPE_R4:
return 0;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+static G_GNUC_UNUSED int
+mono_type_to_insert_op (MonoType *type)
+{
+ switch (type->type) {
+ case MONO_TYPE_I1:
+ case MONO_TYPE_U1:
+ return OP_INSERT_I1;
+ case MONO_TYPE_I2:
+ case MONO_TYPE_U2:
+ return OP_INSERT_I2;
+ case MONO_TYPE_I4:
+ case MONO_TYPE_U4:
+ return OP_INSERT_I4;
+ case MONO_TYPE_I8:
+ case MONO_TYPE_U8:
+ return OP_INSERT_I8;
+ case MONO_TYPE_R4:
+ return OP_INSERT_R4;
+ case MONO_TYPE_R8:
+ return OP_INSERT_R8;
+ default:
+ g_assert_not_reached ();
}
- g_assert_not_reached ();
}
static int
return OP_INSERTX_R4_SLOW;
case MONO_TYPE_R8:
return OP_INSERTX_R8_SLOW;
+ default:
+ g_assert_not_reached ();
}
- g_assert_not_reached ();
}
static MonoInst*
size = mono_type_size (sig->params [0], &align);
- if (size == 2 || size == 4 || size == 8) {
+ if (COMPILE_LLVM (cfg)) {
+ MONO_INST_NEW (cfg, ins, mono_type_to_insert_op (sig->params [0]));
+ ins->klass = cmethod->klass;
+ ins->dreg = ins->sreg1 = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
+ ins->sreg2 = args [1]->dreg;
+ ins->inst_c0 = intrinsic->opcode;
+ MONO_ADD_INS (cfg->cbb, ins);
+ } else if (size == 2 || size == 4 || size == 8) {
MONO_INST_NEW (cfg, ins, mono_type_to_slow_insert_op (sig->params [0]));
ins->klass = cmethod->klass;
/*This is a partial load so we encode the dependency on the previous value by setting dreg and sreg1 to the same value.*/
simd_intrinsic_emit_shuffle (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
{
MonoInst *ins;
- int vreg;
-
- /*TODO Exposing shuffle is not a good thing as it's non obvious. We should come up with better abstractions*/
+ int vreg, vreg2 = -1;
+ int param_count = mono_method_signature (cmethod)->param_count;
- if (args [1]->opcode != OP_ICONST) {
+ if (args [param_count - 1]->opcode != OP_ICONST) {
/*TODO Shuffle with non literals is not yet supported */
return NULL;
}
+
vreg = get_simd_vreg (cfg, cmethod, args [0]);
- NULLIFY_INS (args [1]);
+ if (param_count == 3)
+ vreg2 = get_simd_vreg (cfg, cmethod, args [1]);
+
+ NULLIFY_INS (args [param_count - 1]);
+
MONO_INST_NEW (cfg, ins, intrinsic->opcode);
ins->klass = cmethod->klass;
ins->sreg1 = vreg;
- ins->inst_c0 = args [1]->inst_c0;
+ ins->sreg2 = vreg2;
+ ins->inst_c0 = args [param_count - 1]->inst_c0;
ins->type = STACK_VTYPE;
ins->dreg = alloc_ireg (cfg);
MONO_ADD_INS (cfg->cbb, ins);
+
+ if (param_count == 3 && ins->opcode == OP_PSHUFLED)
+ ins->opcode = OP_SHUFPS;
return ins;
}
static MonoInst*
emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args, const SimdIntrinsc *intrinsics, guint32 size)
{
- const SimdIntrinsc * result = bsearch (cmethod->name, intrinsics, size, sizeof (SimdIntrinsc), &simd_intrinsic_compare_by_name);
+ const SimdIntrinsc * result = mono_binary_search (cmethod->name, intrinsics, size, sizeof (SimdIntrinsc), &simd_intrinsic_compare_by_name);
if (!result) {
DEBUG (printf ("function doesn't have a simd intrinsic %s::%s/%d\n", cmethod->klass->name, cmethod->name, fsig->param_count));
return NULL;
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_MUL_IMM, mult_reg, index2_reg, size);
MONO_EMIT_NEW_BIALU (cfg, OP_PADD, add_reg, array_reg, mult_reg);
- NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, add_reg, add_reg, G_STRUCT_OFFSET (MonoArray, vector));
+ NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, add_reg, add_reg, MONO_STRUCT_OFFSET (MonoArray, vector));
ins->type = STACK_PTR;
MONO_ADD_INS (cfg->cbb, ins);