X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mcs%2Fclass%2FMono.Simd%2FMono.Simd%2FVectorOperations.cs;h=0886b075a884183d1c7967f6701cc8c1585e838c;hb=b59807ecfab572c43c53918d85d2ad2f8db17432;hp=2848cbc3816de98b5472a69befb9f53cddc76fb3;hpb=7df323d06f757aac477d62990ac3080fbac1078a;p=mono.git diff --git a/mcs/class/Mono.Simd/Mono.Simd/VectorOperations.cs b/mcs/class/Mono.Simd/Mono.Simd/VectorOperations.cs index 2848cbc3816..0886b075a88 100644 --- a/mcs/class/Mono.Simd/Mono.Simd/VectorOperations.cs +++ b/mcs/class/Mono.Simd/Mono.Simd/VectorOperations.cs @@ -61,6 +61,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector4ui ArithmeticRightShift (this Vector4ui v1, int amount) { Vector4ui res = new Vector4ui (); @@ -72,6 +73,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us ArithmeticRightShift (this Vector8us va, int amount) { Vector8us res = new Vector8us (); @@ -110,6 +112,25 @@ namespace Mono.Simd return res; } + [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] + public static unsafe int ExtractByteMask (this Vector16sb va) { + int res = 0; + sbyte *a = (sbyte*)&va; + for (int i = 0; i < 16; ++i) + res |= (*a++ & 0x80) >> 7 << i; + return res; + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe int ExtractByteMask (this Vector16b va) { + int res = 0; + byte *a = (byte*)&va; + for (int i = 0; i < 16; ++i) + res |= (*a++ & 0x80) >> 7 << i; + return res; + } + /* ==== Math operations ==== */ @@ -125,6 +146,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us AddWithSaturation (this Vector8us va, Vector8us vb) { Vector8us res = new Vector8us (); ushort *a = &va.v0; @@ -135,6 +157,29 @@ namespace Mono.Simd return res; } + [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] + public static unsafe Vector16sb AddWithSaturation (this Vector16sb va, Vector16sb vb) { + Vector16sb res = new Vector16sb (); + sbyte *a = &va.v0; + sbyte *b = &vb.v0; + sbyte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (sbyte) System.Math.Max (System.Math.Min (*a++ + *b++, sbyte.MaxValue), sbyte.MinValue); + return res; + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector16b AddWithSaturation (this Vector16b va, Vector16b vb) { + Vector16b res = new Vector16b (); + byte *a = &va.v0; + byte *b = &vb.v0; + byte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (byte) System.Math.Min (*a++ + *b++, byte.MaxValue); + return res; + } + [Acceleration (AccelMode.SSE2)] public static unsafe Vector8s SubtractWithSaturation (this Vector8s va, Vector8s vb) { Vector8s res = new Vector8s (); @@ -147,6 +192,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us SubtractWithSaturation (this Vector8us va, Vector8us vb) { Vector8us res = new Vector8us (); ushort *a = &va.v0; @@ -157,6 +203,29 @@ namespace Mono.Simd return res; } + [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] + public static unsafe Vector16sb SubtractWithSaturation (this Vector16sb va, Vector16sb vb) { + Vector16sb res = new Vector16sb (); + sbyte *a = &va.v0; + sbyte *b = &vb.v0; + sbyte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (sbyte) System.Math.Max (System.Math.Min (*a++ - *b++, sbyte.MaxValue), sbyte.MinValue); + return res; + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector16b SubtractWithSaturation (this Vector16b va, Vector16b vb) { + Vector16b res = new Vector16b (); + byte *a = &va.v0; + byte *b = &vb.v0; + byte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (byte) System.Math.Max (*a++ - *b++, 0); + return res; + } + [Acceleration (AccelMode.SSE2)] public static unsafe Vector8s MultiplyStoreHigh (this Vector8s va, Vector8s vb) { Vector8s res = new Vector8s (); @@ -169,6 +238,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us MultiplyStoreHigh (this Vector8us va, Vector8us vb) { Vector8us res = new Vector8us (); ushort *a = &va.v0; @@ -179,6 +249,26 @@ namespace Mono.Simd return res; } + [CLSCompliant(false)] + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector8us SumOfAbsoluteDifferences (this Vector16b va, Vector16sb vb) { + Vector8us res = new Vector8us (); + byte *a = &va.v0; + sbyte *b = (sbyte*)&vb; + + int tmp = 0; + for (int i = 0; i < 8; ++i) + tmp += System.Math.Abs ((int)*a++ - (int)*b++); + res.V0 = (ushort)tmp; + + tmp = 0; + for (int i = 0; i < 8; ++i) + tmp += System.Math.Abs ((int)*a++ - (int)*b++); + res.V4 = (ushort)tmp; + + return res; + } + [Acceleration (AccelMode.SSE1)] public static Vector4f Sqrt (this Vector4f v1) { @@ -204,6 +294,14 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + public static Vector2d Sqrt (this Vector2d v1) + { + return new Vector2d (System.Math.Sqrt (v1.x), + System.Math.Sqrt (v1.y)); + } + + [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us Average (this Vector8us va, Vector8us vb) { Vector8us res = new Vector8us (); ushort *a = &va.v0; @@ -214,6 +312,17 @@ namespace Mono.Simd return res; } + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector16b Average (this Vector16b va, Vector16b vb) { + Vector16b res = new Vector16b (); + byte *a = &va.v0; + byte *b = &vb.v0; + byte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (byte) ((*a++ + *b++ + 1) >> 1); + return res; + } + [Acceleration (AccelMode.SSE1)] public static Vector4f Max (this Vector4f v1, Vector4f v2) { @@ -237,6 +346,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE41)] + [CLSCompliant (false)] public static Vector4ui Max (this Vector4ui v1, Vector4ui v2) { return new Vector4ui (System.Math.Max (v1.x, v2.x), System.Math.Max (v1.y, v2.y), System.Math.Max (v1.z, v2.z), System.Math.Max (v1.w, v2.w)); @@ -254,6 +364,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE41)] + [CLSCompliant (false)] public static unsafe Vector8us Max (this Vector8us va, Vector8us vb) { Vector8us res = new Vector8us (); ushort *a = &va.v0; @@ -273,6 +384,29 @@ namespace Mono.Simd System.Math.Min (v1.w, v2.w)); } + [Acceleration (AccelMode.SSE41)] + [CLSCompliant (false)] + public static unsafe Vector16sb Max (this Vector16sb va, Vector16sb vb) { + Vector16sb res = new Vector16sb (); + sbyte *a = &va.v0; + sbyte *b = &vb.v0; + sbyte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (sbyte) System.Math.Max (*a++, *b++); + return res; + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector16b Max (this Vector16b va, Vector16b vb) { + Vector16b res = new Vector16b (); + byte *a = &va.v0; + byte *b = &vb.v0; + byte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (byte) System.Math.Max(*a++, *b++); + return res; + } + [Acceleration (AccelMode.SSE2)] public static Vector2d Min (this Vector2d v1, Vector2d v2) { @@ -287,6 +421,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE41)] + [CLSCompliant (false)] public static Vector4ui Min (this Vector4ui v1, Vector4ui v2) { return new Vector4ui (System.Math.Min (v1.x, v2.x), System.Math.Min (v1.y, v2.y), System.Math.Min (v1.z, v2.z), System.Math.Min (v1.w, v2.w)); @@ -304,6 +439,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE41)] + [CLSCompliant (false)] public static unsafe Vector8us Min (this Vector8us va, Vector8us vb) { Vector8us res = new Vector8us (); ushort *a = &va.v0; @@ -314,6 +450,30 @@ namespace Mono.Simd return res; } + [Acceleration (AccelMode.SSE41)] + [CLSCompliant (false)] + public static unsafe Vector16sb Min (this Vector16sb va, Vector16sb vb) { + Vector16sb res = new Vector16sb (); + sbyte *a = &va.v0; + sbyte *b = &vb.v0; + sbyte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (sbyte) System.Math.Min(*a++, *b++); + return res; + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector16b Min (this Vector16b va, Vector16b vb) { + Vector16b res = new Vector16b (); + byte *a = &va.v0; + byte *b = &vb.v0; + byte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (byte) System.Math.Min(*a++, *b++); + return res; + } + + /* ==== Horizontal operations ==== */ [Acceleration (AccelMode.SSE3)] @@ -384,6 +544,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE41)] + [CLSCompliant (false)] public static Vector2ul CompareEqual (this Vector2ul v1, Vector2ul v2) { return new Vector2ul ((ulong)(v1.x == v2.x ? -1 : 0), (ulong)(v1.y == v2.y ? -1 : 0)); @@ -396,6 +557,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static Vector4ui CompareEqual (this Vector4ui v1, Vector4ui v2) { return new Vector4ui ((uint)(v1.x == v2.x ? -1 : 0), (uint)(v1.y == v2.y ? -1 : 0), (uint)(v1.z == v2.z ? -1 : 0), (uint)(v1.w == v2.w ? -1 : 0)); @@ -413,6 +575,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us CompareEqual (this Vector8us va, Vector8us vb) { Vector8us res = new Vector8us (); ushort *a = &va.v0; @@ -423,6 +586,29 @@ namespace Mono.Simd return res; } + [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] + public static unsafe Vector16sb CompareEqual (this Vector16sb va, Vector16sb vb) { + Vector16sb res = new Vector16sb (); + sbyte *a = &va.v0; + sbyte *b = &vb.v0; + sbyte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (sbyte) (*a++ == *b++ ? -1 : 0); + return res; + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector16b CompareEqual (this Vector16b va, Vector16b vb) { + Vector16b res = new Vector16b (); + byte *a = &va.v0; + byte *b = &vb.v0; + byte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (byte) (*a++ == *b++ ? -1 : 0); + return res; + } + /*Same as a < b. */ [Acceleration (AccelMode.SSE1)] public unsafe static Vector4f CompareLessThan (this Vector4f v1, Vector4f v2) @@ -494,6 +680,18 @@ namespace Mono.Simd return res; } + [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] + public static unsafe Vector16sb CompareGreaterThan (this Vector16sb va, Vector16sb vb) { + Vector16sb res = new Vector16sb (); + sbyte *a = &va.v0; + sbyte *b = &vb.v0; + sbyte *c = &res.v0; + for (int i = 0; i < 16; ++i) + *c++ = (sbyte) (*a++ > *b++ ? -1 : 0); + return res; + } + /*Same float.IsNaN (a) || float.IsNaN (b). */ [Acceleration (AccelMode.SSE1)] public unsafe static Vector4f CompareUnordered (this Vector4f v1, Vector4f v2) @@ -666,6 +864,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static Vector2ul UnpackLow (this Vector2ul v1, Vector2ul v2) { return new Vector2ul (v1.x, v2.x); @@ -678,6 +877,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static Vector4ui UnpackLow (this Vector4ui v1, Vector4ui v2) { return new Vector4ui (v1.x, v2.x, v1.y, v2.y); @@ -690,11 +890,25 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us UnpackLow (this Vector8us va, Vector8us vb) { return new Vector8us (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3); } - + + [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] + public static unsafe Vector16sb UnpackLow (this Vector16sb va, Vector16sb vb) + { + return new Vector16sb (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3, va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector16b UnpackLow (this Vector16b va, Vector16b vb) + { + return new Vector16b (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3, va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7); + } + [Acceleration (AccelMode.SSE2)] public static Vector2l UnpackHigh (this Vector2l v1, Vector2l v2) { @@ -702,6 +916,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static Vector2ul UnpackHigh (this Vector2ul v1, Vector2ul v2) { return new Vector2ul (v1.y, v2.y); @@ -714,6 +929,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static Vector4ui UnpackHigh (this Vector4ui v1, Vector4ui v2) { return new Vector4ui (v1.z, v2.z, v1.w, v2.w); @@ -726,11 +942,76 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us UnpackHigh (this Vector8us va, Vector8us vb) { return new Vector8us (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7); } + [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] + public static unsafe Vector16sb UnpackHigh (this Vector16sb va, Vector16sb vb) + { + return new Vector16sb (va.v8, vb.v8, va.v9, vb.v9, va.v10, vb.v10, va.v11, vb.v11, va.v12, vb.v12, va.v13, vb.v13, va.v14, vb.v14, va.v15, vb.v15); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector16b UnpackHigh (this Vector16b va, Vector16b vb) + { + return new Vector16b (va.v8, vb.v8, va.v9, vb.v9, va.v10, vb.v10, va.v11, vb.v11, va.v12, vb.v12, va.v13, vb.v13, va.v14, vb.v14, va.v15, vb.v15); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector4f Shuffle (this Vector4f v1, Vector4f v2, ShuffleSel sel) + { + float *p1 = (float*)&v1; + float *p2 = (float*)&v2; + int idx = (int)sel; + return new Vector4f (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3))); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector4i Shuffle (this Vector4i v1, Vector4i v2, ShuffleSel sel) + { + int *p1 = (int*)&v1; + int *p2 = (int*)&v2; + int idx = (int)sel; + return new Vector4i (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3))); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector4ui Shuffle (this Vector4ui v1, Vector4ui v2, ShuffleSel sel) + { + uint *p1 = (uint*)&v1; + uint *p2 = (uint*)&v2; + int idx = (int)sel; + return new Vector4ui (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3))); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector2d Shuffle (this Vector2d v1, Vector2d v2, int sel) + { + double *p1 = (double*)&v1; + double *p2 = (double*)&v2; + return new Vector2d (*(p1 + ((sel >> 0) & 0x1)), *(p2 + ((sel >> 1) & 0x1))); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector2l Shuffle (this Vector2l v1, Vector2l v2, int sel) + { + long *p1 = (long*)&v1; + long *p2 = (long*)&v2; + return new Vector2l (*(p1 + ((sel >> 0) & 0x1)), *(p2 + ((sel >> 1) & 0x1))); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector2ul Shuffle (this Vector2ul v1, Vector2ul v2, int sel) + { + ulong *p1 = (ulong*)&v1; + ulong *p2 = (ulong*)&v2; + return new Vector2ul (*(p1 + ((sel >> 0) & 0x1)), *(p2 + ((sel >> 1) & 0x1))); + } + [Acceleration (AccelMode.SSE2)] public static unsafe Vector4f Shuffle (this Vector4f v1, ShuffleSel sel) { @@ -748,6 +1029,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector4ui Shuffle (this Vector4ui v1, ShuffleSel sel) { uint *ptr = (uint*)&v1; @@ -764,6 +1046,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us ShuffleHigh (this Vector8us va, ShuffleSel sel) { ushort *ptr = ((ushort*)&va) + 4; @@ -780,6 +1063,7 @@ namespace Mono.Simd } [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8us ShuffleLow (this Vector8us va, ShuffleSel sel) { ushort *ptr = ((ushort*)&va); @@ -844,6 +1128,7 @@ namespace Mono.Simd /* This function performs a packusdw, which treats the source as a signed value */ [Acceleration (AccelMode.SSE41)] + [CLSCompliant (false)] public static unsafe Vector8us SignedPackWithUnsignedSaturation (this Vector4ui va, Vector4ui vb) { Vector8us res = new Vector8us (); int *a = (int*)&va; @@ -858,6 +1143,7 @@ namespace Mono.Simd /*This function performs a packuswb, which treats the source as a signed value */ [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector16b SignedPackWithUnsignedSaturation (this Vector8us va, Vector8us vb) { Vector16b res = new Vector16b (); short *a = (short*)&va; @@ -872,6 +1158,7 @@ namespace Mono.Simd /* This function performs a packssdw, which treats the source as a signed value*/ [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector8s SignedPackWithSignedSaturation (this Vector4ui va, Vector4ui vb) { Vector8s res = new Vector8s (); int *a = (int*)&va; @@ -886,6 +1173,7 @@ namespace Mono.Simd /*This function performs a packsswb, which treats the source as a signed value */ [Acceleration (AccelMode.SSE2)] + [CLSCompliant (false)] public static unsafe Vector16sb SignedPackWithSignedSaturation (this Vector8us va, Vector8us vb) { Vector16sb res = new Vector16sb (); short *a = (short*)&va; @@ -897,5 +1185,45 @@ namespace Mono.Simd *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue); return res; } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector4f ConvertToFloat (this Vector4i v0) { + return new Vector4f (v0.X, v0.Y, v0.Z, v0.W); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector2d ConvertToDouble (this Vector4i v0) { + return new Vector2d (v0.X, v0.Y); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector4i ConvertToInt (this Vector2d v0) { + return new Vector4i ((int)System.Math.Round (v0.X), (int)System.Math.Round (v0.Y), 0, 0); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector4i ConvertToIntTruncated (this Vector2d v0) { + return new Vector4i ((int) (v0.X), (int) (v0.Y), 0, 0); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector4f ConvertToFloat (this Vector2d v0) { + return new Vector4f ((float)v0.X, (float)v0.Y, 0, 0); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector4i ConvertToInt (this Vector4f v0) { + return new Vector4i ((int)System.Math.Round (v0.X), (int)System.Math.Round (v0.Y), (int)System.Math.Round (v0.Z), (int)System.Math.Round (v0.W)); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector4i ConvertToIntTruncated (this Vector4f v0) { + return new Vector4i ((int)v0.X, (int)v0.Y, (int)v0.Z, (int)v0.W); + } + + [Acceleration (AccelMode.SSE2)] + public static unsafe Vector2d ConvertToDouble (this Vector4f v0) { + return new Vector2d (v0.X, v0.Y); + } } -} \ No newline at end of file +}