X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fmini%2Fsimd-intrinsics.c;h=aaf6a1935df772c8d327bbe61f6b49cd49f27d1b;hb=6c5018fd88223dd357d8dec1a7dd65258e807290;hp=c81619d7cc4688417de834b2d0b7c87813b8bdb9;hpb=a5e40870bd3bb18e1681afed6c71e7edfdb80534;p=mono.git diff --git a/mono/mini/simd-intrinsics.c b/mono/mini/simd-intrinsics.c index c81619d7cc4..aaf6a1935df 100644 --- a/mono/mini/simd-intrinsics.c +++ b/mono/mini/simd-intrinsics.c @@ -121,391 +121,392 @@ enum { typedef struct { guint16 name; guint16 opcode; + guint8 simd_version_flags; guint8 simd_emit_mode : 4; - guint8 simd_version : 4; - guint8 flags; + guint8 flags : 4; } SimdIntrinsc; static const SimdIntrinsc vector4f_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_AddSub, OP_ADDSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 }, - { SN_AndNot, OP_ANDNPS, SIMD_EMIT_BINARY }, - { SN_CompareEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, - { SN_CompareLessEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LE }, - { SN_CompareLessThan, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LT }, - { SN_CompareNotEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ }, - { SN_CompareNotLessEqual, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLE }, - { SN_CompareNotLessThan, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLT }, - { SN_CompareOrdered, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_ORD }, - { SN_CompareUnordered, OP_COMPPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_UNORD }, - { SN_DuplicateHigh, OP_DUPPS_HIGH, SIMD_EMIT_UNARY, SIMD_VERSION_SSE3 }, - { SN_DuplicateLow, OP_DUPPS_LOW, SIMD_EMIT_UNARY, SIMD_VERSION_SSE3 }, - { SN_HorizontalAdd, OP_HADDPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 }, - { SN_HorizontalSub, OP_HSUBPS, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 }, - { SN_InterleaveHigh, OP_UNPACK_HIGHPS, SIMD_EMIT_BINARY }, - { SN_InterleaveLow, OP_UNPACK_LOWPS, SIMD_EMIT_BINARY }, - { SN_InvSqrt, OP_RSQRTPS, SIMD_EMIT_UNARY }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_Max, OP_MAXPS, SIMD_EMIT_BINARY }, - { SN_Min, OP_MINPS, SIMD_EMIT_BINARY }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_Reciprocal, OP_RCPPS, SIMD_EMIT_UNARY }, - { SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE }, - { SN_Sqrt, OP_SQRTPS, SIMD_EMIT_UNARY }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_StoreNonTemporal, OP_STOREX_NTA_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_get_W, 3, SIMD_EMIT_GETTER }, - { SN_get_X, 0, SIMD_EMIT_GETTER }, - { SN_get_Y, 1, SIMD_EMIT_GETTER }, - { SN_get_Z, 2, SIMD_EMIT_GETTER }, - { SN_op_Addition, OP_ADDPS, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_ANDPS, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_ORPS, SIMD_EMIT_BINARY }, - { SN_op_Division, OP_DIVPS, SIMD_EMIT_BINARY }, - { SN_op_Equality, OP_COMPPS, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, - { SN_op_ExclusiveOr, OP_XORPS, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_Inequality, OP_COMPPS, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ }, - { SN_op_Multiply, OP_MULPS, SIMD_EMIT_BINARY }, - { SN_op_Subtraction, OP_SUBPS, SIMD_EMIT_BINARY }, - { SN_set_W, 3, SIMD_EMIT_SETTER }, - { SN_set_X, 0, SIMD_EMIT_SETTER }, - { SN_set_Y, 1, SIMD_EMIT_SETTER }, - { SN_set_Z, 2, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_R4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_AddSub, OP_ADDSUBPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY}, + { SN_AndNot, OP_ANDNPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY}, + { SN_CompareEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_EQ }, + { SN_CompareLessEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LE }, + { SN_CompareLessThan, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LT }, + { SN_CompareNotEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NEQ }, + { SN_CompareNotLessEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLE }, + { SN_CompareNotLessThan, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT }, + { SN_CompareOrdered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD }, + { SN_CompareUnordered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD }, + { SN_DuplicateHigh, OP_DUPPS_HIGH, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY }, + { SN_DuplicateLow, OP_DUPPS_LOW, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY }, + { SN_HorizontalAdd, OP_HADDPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY }, + { SN_HorizontalSub, OP_HSUBPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY }, + { SN_InterleaveHigh, OP_UNPACK_HIGHPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_InterleaveLow, OP_UNPACK_LOWPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_InvSqrt, OP_RSQRTPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_Max, OP_MAXPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_Min, OP_MINPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_Reciprocal, OP_RCPPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY }, + { SN_Shuffle, OP_PSHUFLED, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE }, + { SN_Sqrt, OP_SQRTPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_StoreNonTemporal, OP_STOREX_NTA_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_get_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_op_Addition, OP_ADDPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_ANDPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_ORPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Division, OP_DIVPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Equality, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ }, + { SN_op_ExclusiveOr, OP_XORPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_Inequality, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ }, + { SN_op_Multiply, OP_MULPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Subtraction, OP_SUBPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER } }; static const SimdIntrinsc vector2d_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_AddSub, OP_ADDSUBPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 }, - { SN_AndNot, OP_ANDNPD, SIMD_EMIT_BINARY }, - { SN_CompareEqual, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, - { SN_CompareLessEqual, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LE }, - { SN_CompareLessThan, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_LT }, - { SN_CompareNotEqual, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ }, - { SN_CompareNotLessEqual, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLE }, - { SN_CompareNotLessThan, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_NLT }, - { SN_CompareOrdered, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_ORD }, - { SN_CompareUnordered, OP_COMPPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1, SIMD_COMP_UNORD }, - { SN_Duplicate, OP_DUPPD, SIMD_EMIT_UNARY, SIMD_VERSION_SSE3 }, - { SN_HorizontalAdd, OP_HADDPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 }, - { SN_HorizontalSub, OP_HSUBPD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE3 }, - { SN_InterleaveHigh, OP_UNPACK_HIGHPD, SIMD_EMIT_BINARY }, - { SN_InterleaveLow, OP_UNPACK_LOWPD, SIMD_EMIT_BINARY }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_Max, OP_MAXPD, SIMD_EMIT_BINARY }, - { SN_Min, OP_MINPD, SIMD_EMIT_BINARY }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_get_X, 0, SIMD_EMIT_GETTER_QWORD }, - { SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD }, - { SN_op_Addition, OP_ADDPD, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_ANDPD, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_ORPD, SIMD_EMIT_BINARY }, - { SN_op_Division, OP_DIVPD, SIMD_EMIT_BINARY }, - { SN_op_ExclusiveOr, OP_XORPD, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_Multiply, OP_MULPD, SIMD_EMIT_BINARY }, - { SN_op_Subtraction, OP_SUBPD, SIMD_EMIT_BINARY }, - { SN_set_X, 0, SIMD_EMIT_SETTER }, - { SN_set_Y, 1, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_R8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_AddSub, OP_ADDSUBPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY,}, + { SN_AndNot, OP_ANDNPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_CompareEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_EQ }, + { SN_CompareLessEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LE }, + { SN_CompareLessThan, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LT }, + { SN_CompareNotEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NEQ }, + { SN_CompareNotLessEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLE }, + { SN_CompareNotLessThan, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT }, + { SN_CompareOrdered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD }, + { SN_CompareUnordered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD }, + { SN_Duplicate, OP_DUPPD, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY }, + { SN_HorizontalAdd, OP_HADDPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY }, + { SN_HorizontalSub, OP_HSUBPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY }, + { SN_InterleaveHigh, OP_UNPACK_HIGHPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_InterleaveLow, OP_UNPACK_LOWPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_Max, OP_MAXPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_Min, OP_MINPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_Sqrt, OP_SQRTPD, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD }, + { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD }, + { SN_op_Addition, OP_ADDPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_ANDPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_ORPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Division, OP_DIVPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_ExclusiveOr, OP_XORPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_Multiply, OP_MULPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Subtraction, OP_SUBPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; static const SimdIntrinsc vector2ul_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_EMIT_BINARY }, - { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_EMIT_BINARY }, - { SN_get_X, 0, SIMD_EMIT_GETTER_QWORD }, - { SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD }, - { SN_op_Addition, OP_PADDQ, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY }, - { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_LeftShift, OP_PSHLQ, SIMD_EMIT_SHIFT }, - { SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY }, - { SN_op_RightShift, OP_PSHRQ, SIMD_EMIT_SHIFT }, - { SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY }, - { SN_set_X, 0, SIMD_EMIT_SETTER }, - { SN_set_Y, 1, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_I8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_CompareEqual, OP_PCMPEQQ, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD }, + { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD }, + { SN_op_Addition, OP_PADDQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_LeftShift, OP_PSHLQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Multiply, OP_PMULQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_RightShift, OP_PSHRQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Subtraction, OP_PSUBQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; static const SimdIntrinsc vector2l_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_CompareEqual, OP_PCMPEQQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_EMIT_BINARY, SIMD_VERSION_SSE42 }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_LogicalRightShift, OP_PSHRQ, SIMD_EMIT_SHIFT }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_EMIT_BINARY }, - { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_EMIT_BINARY }, - { SN_get_X, 0, SIMD_EMIT_GETTER_QWORD }, - { SN_get_Y, 1, SIMD_EMIT_GETTER_QWORD }, - { SN_op_Addition, OP_PADDQ, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY }, - { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_LeftShift, OP_PSHLQ, SIMD_EMIT_SHIFT }, - { SN_op_Multiply, OP_PMULQ, SIMD_EMIT_BINARY }, - { SN_op_Subtraction, OP_PSUBQ, SIMD_EMIT_BINARY }, - { SN_set_X, 0, SIMD_EMIT_SETTER }, - { SN_set_Y, 1, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_I8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_CompareEqual, OP_PCMPEQQ, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_VERSION_SSE42, SIMD_EMIT_BINARY }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_LogicalRightShift, OP_PSHRQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD }, + { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD }, + { SN_op_Addition, OP_PADDQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_LeftShift, OP_PSHLQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Multiply, OP_PMULQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Subtraction, OP_PSUBQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; static const SimdIntrinsc vector4ui_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_ArithmeticRightShift, OP_PSARD, SIMD_EMIT_SHIFT }, - { SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_Max, OP_PMAXD_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_Min, OP_PMIND_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE }, - { SN_SignedPackWithSignedSaturation, OP_PACKD, SIMD_EMIT_BINARY }, - { SN_SignedPackWithUnsignedSaturation, OP_PACKD_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_EMIT_BINARY }, - { SN_UnpackLow, OP_UNPACK_LOWD, SIMD_EMIT_BINARY }, - { SN_get_W, 3, SIMD_EMIT_GETTER }, - { SN_get_X, 0, SIMD_EMIT_GETTER }, - { SN_get_Y, 1, SIMD_EMIT_GETTER }, - { SN_get_Z, 2, SIMD_EMIT_GETTER }, - { SN_op_Addition, OP_PADDD, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY }, - { SN_op_Equality, OP_PCMPEQD, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, - { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_Inequality, OP_PCMPEQD, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ }, - { SN_op_LeftShift, OP_PSHLD, SIMD_EMIT_SHIFT }, - { SN_op_Multiply, OP_PMULD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_op_RightShift, OP_PSHRD, SIMD_EMIT_SHIFT }, - { SN_op_Subtraction, OP_PSUBD, SIMD_EMIT_BINARY }, - { SN_set_W, 3, SIMD_EMIT_SETTER }, - { SN_set_X, 0, SIMD_EMIT_SETTER }, - { SN_set_Y, 1, SIMD_EMIT_SETTER }, - { SN_set_Z, 2, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_ArithmeticRightShift, OP_PSARD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_Max, OP_PMAXD_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_Min, OP_PMIND_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_Shuffle, OP_PSHUFLED, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE }, + { SN_SignedPackWithSignedSaturation, OP_PACKD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_SignedPackWithUnsignedSaturation, OP_PACKD_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackLow, OP_UNPACK_LOWD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_get_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_op_Addition, OP_PADDD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Equality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ }, + { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_Inequality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ }, + { SN_op_LeftShift, OP_PSHLD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Multiply, OP_PMULD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_op_RightShift, OP_PSHRD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Subtraction, OP_PSUBD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; static const SimdIntrinsc vector4i_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_CompareEqual, OP_PCMPEQD, SIMD_EMIT_BINARY }, - { SN_CompareGreaterThan, OP_PCMPGTD, SIMD_EMIT_BINARY }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_LogicalRightShift, OP_PSHRD, SIMD_EMIT_SHIFT }, - { SN_Max, OP_PMAXD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_Min, OP_PMIND, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_PackWithSignedSaturation, OP_PACKD, SIMD_EMIT_BINARY }, - { SN_PackWithUnsignedSaturation, OP_PACKD_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_Shuffle, OP_PSHUFLED, SIMD_EMIT_SHUFFLE }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_EMIT_BINARY }, - { SN_UnpackLow, OP_UNPACK_LOWD, SIMD_EMIT_BINARY }, - { SN_get_W, 3, SIMD_EMIT_GETTER }, - { SN_get_X, 0, SIMD_EMIT_GETTER }, - { SN_get_Y, 1, SIMD_EMIT_GETTER }, - { SN_get_Z, 2, SIMD_EMIT_GETTER }, - { SN_op_Addition, OP_PADDD, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY }, - { SN_op_Equality, OP_PCMPEQD, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, - { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_Inequality, OP_PCMPEQD, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ }, - { SN_op_LeftShift, OP_PSHLD, SIMD_EMIT_SHIFT }, - { SN_op_Multiply, OP_PMULD, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_op_RightShift, OP_PSARD, SIMD_EMIT_SHIFT }, - { SN_op_Subtraction, OP_PSUBD, SIMD_EMIT_BINARY }, - { SN_set_W, 3, SIMD_EMIT_SETTER }, - { SN_set_X, 0, SIMD_EMIT_SETTER }, - { SN_set_Y, 1, SIMD_EMIT_SETTER }, - { SN_set_Z, 2, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_CompareGreaterThan, OP_PCMPGTD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_LogicalRightShift, OP_PSHRD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_Max, OP_PMAXD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_Min, OP_PMIND, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_PackWithSignedSaturation, OP_PACKD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_PackWithUnsignedSaturation, OP_PACKD_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_Shuffle, OP_PSHUFLED, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackLow, OP_UNPACK_LOWD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_get_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_op_Addition, OP_PADDD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Equality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ }, + { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_Inequality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ }, + { SN_op_LeftShift, OP_PSHLD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Multiply, OP_PMULD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_op_RightShift, OP_PSARD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Subtraction, OP_PSUBD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; static const SimdIntrinsc vector8us_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_EMIT_BINARY }, - { SN_ArithmeticRightShift, OP_PSARW, SIMD_EMIT_SHIFT }, - { SN_Average, OP_PAVGW_UN, SIMD_EMIT_BINARY }, - { SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_Max, OP_PMAXW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_Min, OP_PMINW_UN, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_MultiplyStoreHigh, OP_PMULW_HIGH_UN, SIMD_EMIT_BINARY }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_EMIT_SHUFFLE }, - { SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_EMIT_SHUFFLE }, - { SN_SignedPackWithSignedSaturation, OP_PACKW, SIMD_EMIT_BINARY }, - { SN_SignedPackWithUnsignedSaturation, OP_PACKW_UN, SIMD_EMIT_BINARY }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_EMIT_BINARY }, - { SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_EMIT_BINARY }, - { SN_UnpackLow, OP_UNPACK_LOWW, SIMD_EMIT_BINARY }, - { SN_get_V0, 0, SIMD_EMIT_GETTER }, - { SN_get_V1, 1, SIMD_EMIT_GETTER }, - { SN_get_V2, 2, SIMD_EMIT_GETTER }, - { SN_get_V3, 3, SIMD_EMIT_GETTER }, - { SN_get_V4, 4, SIMD_EMIT_GETTER }, - { SN_get_V5, 5, SIMD_EMIT_GETTER }, - { SN_get_V6, 6, SIMD_EMIT_GETTER }, - { SN_get_V7, 7, SIMD_EMIT_GETTER }, - { SN_op_Addition, OP_PADDW, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY }, - { SN_op_Equality, OP_PCMPEQW, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, - { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_Inequality, OP_PCMPEQW, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ }, - { SN_op_LeftShift, OP_PSHLW, SIMD_EMIT_SHIFT }, - { SN_op_Multiply, OP_PMULW, SIMD_EMIT_BINARY }, - { SN_op_RightShift, OP_PSHRW, SIMD_EMIT_SHIFT }, - { SN_op_Subtraction, OP_PSUBW, SIMD_EMIT_BINARY }, - { SN_set_V0, 0, SIMD_EMIT_SETTER }, - { SN_set_V1, 1, SIMD_EMIT_SETTER }, - { SN_set_V2, 2, SIMD_EMIT_SETTER }, - { SN_set_V3, 3, SIMD_EMIT_SETTER }, - { SN_set_V4, 4, SIMD_EMIT_SETTER }, - { SN_set_V5, 5, SIMD_EMIT_SETTER }, - { SN_set_V6, 6, SIMD_EMIT_SETTER }, - { SN_set_V7, 7, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_I2, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_ArithmeticRightShift, OP_PSARW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_Average, OP_PAVGW_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_CompareEqual, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_Max, OP_PMAXW_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_Min, OP_PMINW_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_MultiplyStoreHigh, OP_PMULW_HIGH_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE }, + { SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE }, + { SN_SignedPackWithSignedSaturation, OP_PACKW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_SignedPackWithUnsignedSaturation, OP_PACKW_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackLow, OP_UNPACK_LOWW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_op_Addition, OP_PADDW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Equality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ }, + { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_Inequality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ }, + { SN_op_LeftShift, OP_PSHLW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Multiply, OP_PMULW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_RightShift, OP_PSHRW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Subtraction, OP_PSUBW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; static const SimdIntrinsc vector8s_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_AddWithSaturation, OP_PADDW_SAT, SIMD_EMIT_BINARY }, - { SN_CompareEqual, OP_PCMPEQW, SIMD_EMIT_BINARY }, - { SN_CompareGreaterThan, OP_PCMPGTW, SIMD_EMIT_BINARY }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_LogicalRightShift, OP_PSHRW, SIMD_EMIT_SHIFT }, - { SN_Max, OP_PMAXW, SIMD_EMIT_BINARY }, - { SN_Min, OP_PMINW, SIMD_EMIT_BINARY }, - { SN_MultiplyStoreHigh, OP_PMULW_HIGH, SIMD_EMIT_BINARY }, - { SN_PackWithSignedSaturation, OP_PACKW, SIMD_EMIT_BINARY }, - { SN_PackWithUnsignedSaturation, OP_PACKW_UN, SIMD_EMIT_BINARY }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_EMIT_SHUFFLE }, - { SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_EMIT_SHUFFLE }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_EMIT_BINARY }, - { SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_EMIT_BINARY }, - { SN_UnpackLow, OP_UNPACK_LOWW, SIMD_EMIT_BINARY }, - { SN_get_V0, 0, SIMD_EMIT_GETTER }, - { SN_get_V1, 1, SIMD_EMIT_GETTER }, - { SN_get_V2, 2, SIMD_EMIT_GETTER }, - { SN_get_V3, 3, SIMD_EMIT_GETTER }, - { SN_get_V4, 4, SIMD_EMIT_GETTER }, - { SN_get_V5, 5, SIMD_EMIT_GETTER }, - { SN_get_V6, 6, SIMD_EMIT_GETTER }, - { SN_get_V7, 7, SIMD_EMIT_GETTER }, - { SN_op_Addition, OP_PADDW, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY }, - { SN_op_Equality, OP_PCMPEQW, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, - { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_Inequality, OP_PCMPEQW, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ }, - { SN_op_LeftShift, OP_PSHLW, SIMD_EMIT_SHIFT }, - { SN_op_Multiply, OP_PMULW, SIMD_EMIT_BINARY }, - { SN_op_RightShift, OP_PSARW, SIMD_EMIT_SHIFT }, - { SN_op_Subtraction, OP_PSUBW, SIMD_EMIT_BINARY }, - { SN_set_V0, 0, SIMD_EMIT_SETTER }, - { SN_set_V1, 1, SIMD_EMIT_SETTER }, - { SN_set_V2, 2, SIMD_EMIT_SETTER }, - { SN_set_V3, 3, SIMD_EMIT_SETTER }, - { SN_set_V4, 4, SIMD_EMIT_SETTER }, - { SN_set_V5, 5, SIMD_EMIT_SETTER }, - { SN_set_V6, 6, SIMD_EMIT_SETTER }, - { SN_set_V7, 7, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_I2, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_AddWithSaturation, OP_PADDW_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_CompareEqual, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_CompareGreaterThan, OP_PCMPGTW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_LogicalRightShift, OP_PSHRW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_Max, OP_PMAXW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_Min, OP_PMINW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_MultiplyStoreHigh, OP_PMULW_HIGH, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_PackWithSignedSaturation, OP_PACKW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_PackWithUnsignedSaturation, OP_PACKW_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE }, + { SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackLow, OP_UNPACK_LOWW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_op_Addition, OP_PADDW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Equality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ }, + { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_Inequality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ }, + { SN_op_LeftShift, OP_PSHLW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Multiply, OP_PMULW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_RightShift, OP_PSARW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT }, + { SN_op_Subtraction, OP_PSUBW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; static const SimdIntrinsc vector16b_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_EMIT_BINARY }, - { SN_Average, OP_PAVGB_UN, SIMD_EMIT_BINARY }, - { SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY }, - { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_Max, OP_PMAXB_UN, SIMD_EMIT_BINARY }, - { SN_Min, OP_PMINB_UN, SIMD_EMIT_BINARY }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_SubtractWithSaturation, OP_PSUBB_SAT_UN, SIMD_EMIT_BINARY }, - { SN_SumOfAbsoluteDifferences, OP_PSUM_ABS_DIFF, SIMD_EMIT_BINARY }, - { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_EMIT_BINARY }, - { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_EMIT_BINARY }, - { SN_get_V0, 0, SIMD_EMIT_GETTER }, - { SN_get_V1, 1, SIMD_EMIT_GETTER }, - { SN_get_V10, 10, SIMD_EMIT_GETTER }, - { SN_get_V11, 11, SIMD_EMIT_GETTER }, - { SN_get_V12, 12, SIMD_EMIT_GETTER }, - { SN_get_V13, 13, SIMD_EMIT_GETTER }, - { SN_get_V14, 14, SIMD_EMIT_GETTER }, - { SN_get_V15, 15, SIMD_EMIT_GETTER }, - { SN_get_V2, 2, SIMD_EMIT_GETTER }, - { SN_get_V3, 3, SIMD_EMIT_GETTER }, - { SN_get_V4, 4, SIMD_EMIT_GETTER }, - { SN_get_V5, 5, SIMD_EMIT_GETTER }, - { SN_get_V6, 6, SIMD_EMIT_GETTER }, - { SN_get_V7, 7, SIMD_EMIT_GETTER }, - { SN_get_V8, 8, SIMD_EMIT_GETTER }, - { SN_get_V9, 9, SIMD_EMIT_GETTER }, - { SN_op_Addition, OP_PADDB, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY }, - { SN_op_Equality, OP_PCMPEQB, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, - { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_Inequality, OP_PCMPEQB, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ }, - { SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY }, - { SN_set_V0, 0, SIMD_EMIT_SETTER }, - { SN_set_V1, 1, SIMD_EMIT_SETTER }, - { SN_set_V10, 10, SIMD_EMIT_SETTER }, - { SN_set_V11, 11, SIMD_EMIT_SETTER }, - { SN_set_V12, 12, SIMD_EMIT_SETTER }, - { SN_set_V13, 13, SIMD_EMIT_SETTER }, - { SN_set_V14, 14, SIMD_EMIT_SETTER }, - { SN_set_V15, 15, SIMD_EMIT_SETTER }, - { SN_set_V2, 2, SIMD_EMIT_SETTER }, - { SN_set_V3, 3, SIMD_EMIT_SETTER }, - { SN_set_V4, 4, SIMD_EMIT_SETTER }, - { SN_set_V5, 5, SIMD_EMIT_SETTER }, - { SN_set_V6, 6, SIMD_EMIT_SETTER }, - { SN_set_V7, 7, SIMD_EMIT_SETTER }, - { SN_set_V8, 8, SIMD_EMIT_SETTER }, - { SN_set_V9, 9, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_I1, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_Average, OP_PAVGB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_CompareEqual, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_ExtractByteMask, 0, SIMD_VERSION_SSE1, SIMD_EMIT_EXTRACT_MASK }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_Max, OP_PMAXB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_Min, OP_PMINB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_SubtractWithSaturation, OP_PSUBB_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_SumOfAbsoluteDifferences, OP_PSUM_ABS_DIFF, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_op_Addition, OP_PADDB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Equality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ }, + { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_Inequality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ }, + { SN_op_Subtraction, OP_PSUBB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; /* @@ -513,62 +514,62 @@ Missing: setters */ static const SimdIntrinsc vector16sb_intrinsics[] = { - { SN_ctor, 0, SIMD_EMIT_CTOR }, - { SN_AddWithSaturation, OP_PADDB_SAT, SIMD_EMIT_BINARY }, - { SN_CompareEqual, OP_PCMPEQB, SIMD_EMIT_BINARY }, - { SN_CompareGreaterThan, OP_PCMPGTB, SIMD_EMIT_BINARY }, - { SN_ExtractByteMask, 0, SIMD_EMIT_EXTRACT_MASK }, - { SN_LoadAligned, 0, SIMD_EMIT_LOAD_ALIGNED }, - { SN_Max, OP_PMAXB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_Min, OP_PMINB, SIMD_EMIT_BINARY, SIMD_VERSION_SSE41 }, - { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_0 }, - { SN_PrefetchTemporal1stLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_1 }, - { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_2 }, - { SN_PrefetchNonTemporal, 0, SIMD_EMIT_PREFETCH, SIMD_VERSION_SSE1, SIMD_PREFETCH_MODE_NTA }, - { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_EMIT_STORE }, - { SN_SubtractWithSaturation, OP_PSUBB_SAT, SIMD_EMIT_BINARY }, - { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_EMIT_BINARY }, - { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_EMIT_BINARY }, - { SN_get_V0, 0, SIMD_EMIT_GETTER }, - { SN_get_V1, 1, SIMD_EMIT_GETTER }, - { SN_get_V10, 10, SIMD_EMIT_GETTER }, - { SN_get_V11, 11, SIMD_EMIT_GETTER }, - { SN_get_V12, 12, SIMD_EMIT_GETTER }, - { SN_get_V13, 13, SIMD_EMIT_GETTER }, - { SN_get_V14, 14, SIMD_EMIT_GETTER }, - { SN_get_V15, 15, SIMD_EMIT_GETTER }, - { SN_get_V2, 2, SIMD_EMIT_GETTER }, - { SN_get_V3, 3, SIMD_EMIT_GETTER }, - { SN_get_V4, 4, SIMD_EMIT_GETTER }, - { SN_get_V5, 5, SIMD_EMIT_GETTER }, - { SN_get_V6, 6, SIMD_EMIT_GETTER }, - { SN_get_V7, 7, SIMD_EMIT_GETTER }, - { SN_get_V8, 8, SIMD_EMIT_GETTER }, - { SN_get_V9, 9, SIMD_EMIT_GETTER }, - { SN_op_Addition, OP_PADDB, SIMD_EMIT_BINARY }, - { SN_op_BitwiseAnd, OP_PAND, SIMD_EMIT_BINARY }, - { SN_op_BitwiseOr, OP_POR, SIMD_EMIT_BINARY }, - { SN_op_Equality, OP_PCMPEQB, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_EQ }, - { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY }, - { SN_op_Explicit, 0, SIMD_EMIT_CAST }, - { SN_op_Inequality, OP_PCMPEQB, SIMD_EMIT_EQUALITY, SIMD_VERSION_SSE1, SIMD_COMP_NEQ }, - { SN_op_Subtraction, OP_PSUBB, SIMD_EMIT_BINARY }, - { SN_set_V0, 0, SIMD_EMIT_SETTER }, - { SN_set_V1, 1, SIMD_EMIT_SETTER }, - { SN_set_V10, 10, SIMD_EMIT_SETTER }, - { SN_set_V11, 11, SIMD_EMIT_SETTER }, - { SN_set_V12, 12, SIMD_EMIT_SETTER }, - { SN_set_V13, 13, SIMD_EMIT_SETTER }, - { SN_set_V14, 14, SIMD_EMIT_SETTER }, - { SN_set_V15, 15, SIMD_EMIT_SETTER }, - { SN_set_V2, 2, SIMD_EMIT_SETTER }, - { SN_set_V3, 3, SIMD_EMIT_SETTER }, - { SN_set_V4, 4, SIMD_EMIT_SETTER }, - { SN_set_V5, 5, SIMD_EMIT_SETTER }, - { SN_set_V6, 6, SIMD_EMIT_SETTER }, - { SN_set_V7, 7, SIMD_EMIT_SETTER }, - { SN_set_V8, 8, SIMD_EMIT_SETTER }, - { SN_set_V9, 9, SIMD_EMIT_SETTER }, + { SN_ctor, OP_EXPAND_I1, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR }, + { SN_AddWithSaturation, OP_PADDB_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_CompareEqual, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_CompareGreaterThan, OP_PCMPGTB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_ExtractByteMask, 0, SIMD_VERSION_SSE1, SIMD_EMIT_EXTRACT_MASK }, + { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED }, + { SN_Max, OP_PMAXB, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_Min, OP_PMINB, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY }, + { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 }, + { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 }, + { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 }, + { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA }, + { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE }, + { SN_SubtractWithSaturation, OP_PSUBB_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_get_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER }, + { SN_op_Addition, OP_PADDB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Equality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ }, + { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST }, + { SN_op_Inequality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ }, + { SN_op_Subtraction, OP_PSUBB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY }, + { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, + { SN_set_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }, }; static guint32 simd_supported_versions; @@ -676,6 +677,9 @@ mono_simd_simplify_indirection (MonoCompile *cfg) /*Scan the first basic block looking xzeros not used*/ for (ins = first_bb->code; ins; ins = ins->next) { + int num_sregs; + int sregs [MONO_MAX_SRC_REGS]; + if (ins->opcode == OP_XZERO) { if (!(vreg_flags [ins->dreg] & VREG_HAS_OTHER_OP_BB0)) { DEBUG (printf ("[simd-simplify] R%d has vzero: ", ins->dreg); mono_print_ins(ins)); @@ -685,13 +689,13 @@ mono_simd_simplify_indirection (MonoCompile *cfg) } if (ins->opcode == OP_LDADDR && apply_vreg_first_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, max_vreg, vreg_flags)) continue; - if (apply_vreg_first_block_interference (cfg, ins, ins->dreg, max_vreg, vreg_flags)) continue; - if (apply_vreg_first_block_interference (cfg, ins, ins->sreg1, max_vreg, vreg_flags)) - continue; - if (apply_vreg_first_block_interference (cfg, ins, ins->sreg2, max_vreg, vreg_flags)) - continue; + num_sregs = mono_inst_get_src_registers (ins, sregs); + for (i = 0; i < num_sregs; ++i) { + if (apply_vreg_first_block_interference (cfg, ins, sregs [i], max_vreg, vreg_flags)) + break; + } } if (IS_DEBUG_ON (cfg)) { @@ -721,15 +725,19 @@ mono_simd_simplify_indirection (MonoCompile *cfg) for (bb = first_bb->next_bb; bb; bb = bb->next_bb) { for (ins = bb->code; ins; ins = ins->next) { - + int num_sregs; + int sregs [MONO_MAX_SRC_REGS]; + if (ins->opcode == OP_LDADDR && apply_vreg_following_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, bb, max_vreg, vreg_flags, target_bb)) continue; if (apply_vreg_following_block_interference (cfg, ins, ins->dreg, bb, max_vreg, vreg_flags, target_bb)) continue; - if (apply_vreg_following_block_interference (cfg, ins, ins->sreg1, bb, max_vreg, vreg_flags, target_bb)) - continue; - if (apply_vreg_following_block_interference (cfg, ins, ins->sreg2, bb, max_vreg, vreg_flags, target_bb)) - continue; + num_sregs = mono_inst_get_src_registers (ins, sregs); + for (i = 0; i < num_sregs; ++i) { + if (apply_vreg_following_block_interference (cfg, ins, sregs [i], bb, + max_vreg, vreg_flags, target_bb)) + continue; + } } } @@ -745,10 +753,19 @@ mono_simd_simplify_indirection (MonoCompile *cfg) if (!(vreg_flags [var->dreg] & VREG_SINGLE_BB_USE)) continue; for (ins = target_bb [var->dreg]->code; ins; ins = ins->next) { + int num_sregs, j; + int sregs [MONO_MAX_SRC_REGS]; + gboolean found = FALSE; + + num_sregs = mono_inst_get_src_registers (ins, sregs); + for (j = 0; j < num_sregs; ++j) { + if (sregs [i] == var->dreg) + found = TRUE; + } /*We can avoid inserting the XZERO if the first use doesn't depend on the zero'ed value.*/ - if (ins->dreg == var->dreg && ins->sreg1 != var->dreg && ins->sreg2 != var->dreg) { + if (ins->dreg == var->dreg && !found) { break; - } else if (ins->sreg1 == var->dreg || ins->sreg2 == var->dreg) { + } else if (found) { MonoInst *tmp; MONO_INST_NEW (cfg, tmp, OP_XZERO); tmp->dreg = var->dreg; @@ -789,8 +806,10 @@ get_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src) * This function will load the value if needed. */ static int -load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src) +load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect) { + if (indirect) + *indirect = FALSE; if (src->opcode == OP_XMOVE) { return src->sreg1; } else if (src->opcode == OP_LDADDR) { @@ -801,6 +820,8 @@ load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src) return src->dreg; } else if (src->type == STACK_PTR || src->type == STACK_MP) { MonoInst *ins; + if (indirect) + *indirect = TRUE; MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE); ins->klass = cmethod->klass; @@ -845,22 +866,74 @@ get_simd_ctor_spill_area (MonoCompile *cfg, MonoClass *avector_klass) return cfg->simd_ctor_var; } +static int +mono_type_to_expand_op (MonoType *type) +{ + switch (type->type) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_EXPAND_I1; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_EXPAND_I2; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_EXPAND_I4; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_EXPAND_I8; + case MONO_TYPE_R4: + return OP_EXPAND_R4; + case MONO_TYPE_R8: + return OP_EXPAND_R8; + } + g_assert_not_reached (); +} + +static int +get_simd_vreg_or_expanded_scalar (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, int position) +{ + MonoInst *ins; + MonoMethodSignature *sig = mono_method_signature (cmethod); + int expand_op; + + g_assert (sig->param_count == 2); + g_assert (position == 0 || position == 1); + + if (mono_class_from_mono_type (sig->params [position])->simd_type) + return get_simd_vreg (cfg, cmethod, src); + + expand_op = mono_type_to_expand_op (sig->params [position]); + MONO_INST_NEW (cfg, ins, expand_op); + ins->klass = cmethod->klass; + ins->sreg1 = src->dreg; + ins->type = STACK_VTYPE; + ins->dreg = alloc_ireg (cfg); + MONO_ADD_INS (cfg->cbb, ins); + + if (expand_op == OP_EXPAND_R4) + ins->backend.spill_var = get_int_to_float_spill_area (cfg); + else if (expand_op == OP_EXPAND_R8) + ins->backend.spill_var = get_double_spill_area (cfg); + + return ins->dreg; +} + static MonoInst* simd_intrinsic_emit_binary (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args) { MonoInst* ins; int left_vreg, right_vreg; - left_vreg = get_simd_vreg (cfg, cmethod, args [0]); - right_vreg = get_simd_vreg (cfg, cmethod, args [1]); - + left_vreg = get_simd_vreg_or_expanded_scalar (cfg, cmethod, args [0], 0); + right_vreg = get_simd_vreg_or_expanded_scalar (cfg, cmethod, args [1], 1); + MONO_INST_NEW (cfg, ins, intrinsic->opcode); ins->klass = cmethod->klass; ins->sreg1 = left_vreg; ins->sreg2 = right_vreg; ins->type = STACK_VTYPE; - ins->klass = cmethod->klass; ins->dreg = alloc_ireg (cfg); ins->inst_c0 = intrinsic->flags; MONO_ADD_INS (cfg->cbb, ins); @@ -953,13 +1026,16 @@ simd_intrinsic_emit_setter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon MonoInst *ins; MonoMethodSignature *sig = mono_method_signature (cmethod); int size, align; + gboolean indirect; + int dreg; + size = mono_type_size (sig->params [0], &align); if (size == 2 || size == 4 || size == 8) { MONO_INST_NEW (cfg, ins, mono_type_to_slow_insert_op (sig->params [0])); ins->klass = cmethod->klass; /*This is a partial load so we encode the dependency on the previous value by setting dreg and sreg1 to the same value.*/ - ins->dreg = ins->sreg1 = load_simd_vreg (cfg, cmethod, args [0]); + ins->dreg = ins->sreg1 = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect); ins->sreg2 = args [1]->dreg; ins->inst_c0 = intrinsic->opcode; if (sig->params [0]->type == MONO_TYPE_R4) @@ -972,7 +1048,7 @@ simd_intrinsic_emit_setter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon MONO_INST_NEW (cfg, ins, OP_EXTRACTX_U2); ins->klass = cmethod->klass; - ins->sreg1 = sreg = load_simd_vreg (cfg, cmethod, args [0]); + ins->sreg1 = sreg = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect); ins->type = STACK_I4; ins->dreg = vreg = alloc_ireg (cfg); ins->inst_c0 = intrinsic->opcode / 2; @@ -985,7 +1061,14 @@ simd_intrinsic_emit_setter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon ins->dreg = sreg; ins->inst_c0 = intrinsic->opcode; MONO_ADD_INS (cfg->cbb, ins); + } + if (indirect) { + MONO_INST_NEW (cfg, ins, OP_STOREX_MEMBASE); + ins->klass = cmethod->klass; + ins->dreg = args [0]->dreg; + ins->sreg1 = dreg; + MONO_ADD_INS (cfg->cbb, ins); } return ins; } @@ -997,9 +1080,9 @@ simd_intrinsic_emit_getter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon MonoMethodSignature *sig = mono_method_signature (cmethod); int vreg, shift_bits = mono_type_elements_shift_bits (sig->ret); - vreg = load_simd_vreg (cfg, cmethod, args [0]); + vreg = load_simd_vreg (cfg, cmethod, args [0], NULL); - if (intrinsic->opcode >> shift_bits) { + if ((intrinsic->opcode >> shift_bits) && !cfg->compile_llvm) { MONO_INST_NEW (cfg, ins, OP_PSHUFLED); ins->klass = cmethod->klass; ins->sreg1 = vreg; @@ -1014,7 +1097,10 @@ simd_intrinsic_emit_getter (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mon ins->sreg1 = vreg; ins->type = STACK_I4; ins->dreg = vreg = alloc_ireg (cfg); - ins->inst_c0 = intrinsic->opcode & ((1 << shift_bits) - 1); + if (cfg->compile_llvm) + ins->inst_c0 = intrinsic->opcode; + else + ins->inst_c0 = intrinsic->opcode & ((1 << shift_bits) - 1); MONO_ADD_INS (cfg->cbb, ins); if (sig->ret->type == MONO_TYPE_R4) { @@ -1036,7 +1122,7 @@ simd_intrinsic_emit_long_getter (const SimdIntrinsc *intrinsic, MonoCompile *cfg int vreg; gboolean is_r8 = mono_method_signature (cmethod)->ret->type == MONO_TYPE_R8; - vreg = load_simd_vreg (cfg, cmethod, args [0]); + vreg = load_simd_vreg (cfg, cmethod, args [0], NULL); MONO_INST_NEW (cfg, ins, is_r8 ? OP_EXTRACT_R8 : OP_EXTRACT_I8); ins->klass = cmethod->klass; @@ -1065,6 +1151,38 @@ simd_intrinsic_emit_ctor (const SimdIntrinsc *intrinsic, MonoCompile *cfg, MonoM int store_op = mono_type_to_store_membase (cfg, sig->params [0]); int arg_size = mono_type_size (sig->params [0], &i); + if (sig->param_count == 1) { + int dreg; + + if (is_ldaddr) { + dreg = args [0]->inst_i0->dreg; + NULLIFY_INS (args [0]); + } else { + g_assert (args [0]->type == STACK_MP || args [0]->type == STACK_PTR); + dreg = alloc_ireg (cfg); + } + + MONO_INST_NEW (cfg, ins, intrinsic->opcode); + ins->klass = cmethod->klass; + ins->sreg1 = args [1]->dreg; + ins->type = STACK_VTYPE; + ins->dreg = dreg; + + MONO_ADD_INS (cfg->cbb, ins); + if (sig->params [0]->type == MONO_TYPE_R4) + ins->backend.spill_var = get_int_to_float_spill_area (cfg); + else if (sig->params [0]->type == MONO_TYPE_R8) + ins->backend.spill_var = get_double_spill_area (cfg); + + if (!is_ldaddr) { + MONO_INST_NEW (cfg, ins, OP_STOREX_MEMBASE); + ins->dreg = args [0]->dreg; + ins->sreg1 = dreg; + MONO_ADD_INS (cfg->cbb, ins); + } + return ins; + } + if (is_ldaddr) { NEW_VARLOADA (cfg, ins, get_simd_ctor_spill_area (cfg, cmethod->klass), &cmethod->klass->byref_arg); MONO_ADD_INS (cfg->cbb, ins); @@ -1201,8 +1319,8 @@ simd_intrinsic_emit_shuffle (const SimdIntrinsc *intrinsic, MonoCompile *cfg, Mo /*TODO Exposing shuffle is not a good thing as it's non obvious. We should come up with better abstractions*/ if (args [1]->opcode != OP_ICONST) { - g_warning ("Shuffle with non literals is not yet supported"); - g_assert_not_reached (); + /*TODO Shuffle with non literals is not yet supported */ + return NULL; } vreg = get_simd_vreg (cfg, cmethod, args [0]); NULLIFY_INS (args [1]); @@ -1318,9 +1436,16 @@ emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi mono_print_ins (args [i]); } } - if (result->simd_version && !((1 << result->simd_version) & simd_supported_versions)) { - if (IS_DEBUG_ON (cfg)) - printf ("function %s::%s/%d requires unsuported SIMD instruction set %s \n", cmethod->klass->name, cmethod->name, fsig->param_count, simd_version_name (result->simd_version)); + if (result->simd_version_flags && !(result->simd_version_flags & simd_supported_versions)) { + if (IS_DEBUG_ON (cfg)) { + int x; + printf ("function %s::%s/%d requires one of unsuported SIMD instruction set(s): ", cmethod->klass->name, cmethod->name, fsig->param_count); + for (x = 1; x <= SIMD_VERSION_INDEX_END; x++) + if (result->simd_version_flags & (1 << x)) + printf ("%s ", simd_version_name (1 << x)); + + printf ("\n"); + } return NULL; } @@ -1452,37 +1577,45 @@ emit_simd_runtime_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodS MonoInst* mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) { + const char *class_name; + if (strcmp ("Mono.Simd", cmethod->klass->name_space)) return NULL; - - if (!strcmp ("SimdRuntime", cmethod->klass->name)) + + class_name = cmethod->klass->name; + if (!strcmp ("SimdRuntime", class_name)) return emit_simd_runtime_intrinsics (cfg, cmethod, fsig, args); - if (!strcmp ("ArrayExtensions", cmethod->klass->name)) + if (!strcmp ("ArrayExtensions", class_name)) return emit_array_extension_intrinsics (cfg, cmethod, fsig, args); - if (!cmethod->klass->simd_type) + if (!strcmp ("VectorOperations", class_name)) { + if (!(cmethod->flags & METHOD_ATTRIBUTE_STATIC)) + return NULL; + class_name = mono_class_from_mono_type (mono_method_signature (cmethod)->params [0])->name; + } else if (!cmethod->klass->simd_type) return NULL; + cfg->uses_simd_intrinsics = 1; - if (!strcmp ("Vector2d", cmethod->klass->name)) + if (!strcmp ("Vector2d", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector2d_intrinsics, sizeof (vector2d_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector4f", cmethod->klass->name)) + if (!strcmp ("Vector4f", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector4f_intrinsics, sizeof (vector4f_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector2ul", cmethod->klass->name)) + if (!strcmp ("Vector2ul", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector2ul_intrinsics, sizeof (vector2ul_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector2l", cmethod->klass->name)) + if (!strcmp ("Vector2l", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector2l_intrinsics, sizeof (vector2l_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector4ui", cmethod->klass->name)) + if (!strcmp ("Vector4ui", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector4ui_intrinsics, sizeof (vector4ui_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector4i", cmethod->klass->name)) + if (!strcmp ("Vector4i", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector4i_intrinsics, sizeof (vector4i_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector8us", cmethod->klass->name)) + if (!strcmp ("Vector8us", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector8us_intrinsics, sizeof (vector8us_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector8s", cmethod->klass->name)) + if (!strcmp ("Vector8s", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector8s_intrinsics, sizeof (vector8s_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector16b", cmethod->klass->name)) + if (!strcmp ("Vector16b", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector16b_intrinsics, sizeof (vector16b_intrinsics) / sizeof (SimdIntrinsc)); - if (!strcmp ("Vector16sb", cmethod->klass->name)) + if (!strcmp ("Vector16sb", class_name)) return emit_intrinsics (cfg, cmethod, fsig, args, vector16sb_intrinsics, sizeof (vector16sb_intrinsics) / sizeof (SimdIntrinsc)); return NULL;