4 // Rodrigo Kumpera (rkumpera@novell.com)
6 // (C) 2008 Novell, Inc. (http://www.novell.com)
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 using System.Runtime.InteropServices;
32 [StructLayout(LayoutKind.Sequential, Pack = 0, Size = 16)]
33 public struct Vector8s
35 private short v0, v1, v2, v3, v4, v5, v6, v7;
36 public Vector8s (short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
48 public short V0 { get { return v0; } set { v0 = value; } }
49 public short V1 { get { return v1; } set { v1 = value; } }
50 public short V2 { get { return v2; } set { v2 = value; } }
51 public short V3 { get { return v3; } set { v3 = value; } }
52 public short V4 { get { return v4; } set { v4 = value; } }
53 public short V5 { get { return v5; } set { v5 = value; } }
54 public short V6 { get { return v6; } set { v6 = value; } }
55 public short V7 { get { return v7; } set { v7 = value; } }
57 [System.Runtime.CompilerServices.IndexerName ("Component")]
58 public unsafe short this [int index]
61 if ((index | 0x7) != 0x7) //index < 0 || index > 7
62 throw new ArgumentOutOfRangeException ("index");
63 fixed (short *v = &v0) {
68 if ( (index | 0x7) != 0x7) //index < 0 || index > 7
69 throw new ArgumentOutOfRangeException ("index");
70 fixed (short *v = &v0) {
71 * (v + index) = value;
76 [Acceleration (AccelMode.SSE2)]
77 public static unsafe Vector8s operator + (Vector8s va, Vector8s vb)
79 Vector8s res = new Vector8s ();
83 for (int i = 0; i < 8; ++i)
84 *c++ = (short)(*a++ + *b++);
88 [Acceleration (AccelMode.SSE2)]
89 public static unsafe Vector8s operator - (Vector8s va, Vector8s vb)
91 Vector8s res = new Vector8s ();
95 for (int i = 0; i < 8; ++i)
96 *c++ = (short)(*a++ - *b++);
100 [Acceleration (AccelMode.SSE2)]
101 public static unsafe Vector8s operator * (Vector8s va, Vector8s vb)
103 Vector8s res = new Vector8s ();
107 for (int i = 0; i < 8; ++i)
108 *c++ = (short)(*a++ * (*b++));
112 [Acceleration (AccelMode.SSE2)]
113 public static unsafe Vector8s operator >> (Vector8s va, int amount)
115 Vector8s res = new Vector8s ();
118 for (int i = 0; i < 8; ++i)
119 *b++ = (short)(*a++ >> amount);
123 [Acceleration (AccelMode.SSE2)]
124 public static unsafe Vector8s operator << (Vector8s va, int amount)
126 Vector8s res = new Vector8s ();
129 for (int i = 0; i < 8; ++i)
130 *b++ = (short)(*a++ << amount);
134 [Acceleration (AccelMode.SSE2)]
135 public static unsafe Vector8s operator & (Vector8s va, Vector8s vb)
137 Vector8s res = new Vector8s ();
138 ulong *a = (ulong*) &va.v0;
139 ulong *b = (ulong*) &vb.v0;
140 ulong *c = (ulong*) &res.v0;
141 *c++ = (ulong)(*a++ & *b++);
142 *c = (ulong)(*a & *b);
146 [Acceleration (AccelMode.SSE2)]
147 public static unsafe Vector8s operator | (Vector8s va, Vector8s vb)
149 Vector8s res = new Vector8s ();
150 ulong *a = (ulong*) &va.v0;
151 ulong *b = (ulong*) &vb.v0;
152 ulong *c = (ulong*) &res.v0;
153 *c++ = (ulong)(*a++ | *b++);
154 *c = (ulong)(*a | *b);
158 [Acceleration (AccelMode.SSE2)]
159 public static unsafe Vector8s operator ^ (Vector8s va, Vector8s vb)
161 Vector8s res = new Vector8s ();
162 ulong *a = (ulong*) &va.v0;
163 ulong *b = (ulong*) &vb.v0;
164 ulong *c = (ulong*) &res.v0;
165 *c++ = (ulong)(*a++ ^ *b++);
166 *c = (ulong)(*a ^ *b);
170 [Acceleration (AccelMode.SSE2)]
171 public static unsafe Vector8s UnpackLow (Vector8s va, Vector8s vb)
173 return new Vector8s (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3);
176 [Acceleration (AccelMode.SSE2)]
177 public static unsafe Vector8s UnpackHigh (Vector8s va, Vector8s vb)
179 return new Vector8s (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
182 [Acceleration (AccelMode.SSE2)]
183 public static unsafe Vector8s LogicalRightShift (Vector8s va, int amount)
185 Vector8s res = new Vector8s ();
188 for (int i = 0; i < 8; ++i)
189 *b++ = (short)((ushort)(*a++) >> amount);
193 [Acceleration (AccelMode.SSE2)]
194 public static unsafe Vector8s AddWithSaturation (Vector8s va, Vector8s vb) {
195 Vector8s res = new Vector8s ();
199 for (int i = 0; i < 8; ++i)
200 *c++ = (short) System.Math.Max (System.Math.Min (*a++ + *b++, short.MaxValue), short.MinValue);
204 [Acceleration (AccelMode.SSE2)]
205 public static unsafe Vector8s SubtractWithSaturation (Vector8s va, Vector8s vb) {
206 Vector8s res = new Vector8s ();
210 for (int i = 0; i < 8; ++i)
211 *c++ = (short) System.Math.Max (System.Math.Min (*a++ - *b++, short.MaxValue), short.MinValue); ;
215 [Acceleration (AccelMode.SSE2)]
216 public static unsafe Vector8s Max (Vector8s va, Vector8s vb) {
217 Vector8s res = new Vector8s ();
221 for (int i = 0; i < 8; ++i)
222 *c++ = (short) System.Math.Max (*a++, *b++);
226 [Acceleration (AccelMode.SSE2)]
227 public static unsafe Vector8s Min (Vector8s va, Vector8s vb) {
228 Vector8s res = new Vector8s ();
232 for (int i = 0; i < 8; ++i)
233 *c++ = (short) System.Math.Min (*a++, *b++);
237 [Acceleration (AccelMode.SSE2)]
238 public static unsafe int ExtractByteMask (Vector8s va) {
240 byte *a = (byte*)&va;
241 for (int i = 0; i < 16; ++i)
242 res |= (*a++ & 0x80) >> 7 << i;
246 [Acceleration (AccelMode.SSE2)]
247 public static unsafe Vector8s ShuffleHigh (Vector8s va, ShuffleSel sel)
249 short *ptr = ((short*)&va) + 4;
251 return new Vector8s (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)));
254 [Acceleration (AccelMode.SSE2)]
255 public static unsafe Vector8s ShuffleLow (Vector8s va, ShuffleSel sel)
257 short *ptr = ((short*)&va);
259 return new Vector8s (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7);
262 [Acceleration (AccelMode.SSE2)]
263 public static unsafe Vector8s CompareEqual (Vector8s va, Vector8s vb) {
264 Vector8s res = new Vector8s ();
268 for (int i = 0; i < 8; ++i)
269 *c++ = (short) (*a++ == *b++ ? -1 : 0);
273 [Acceleration (AccelMode.SSE2)]
274 public static unsafe Vector8s CompareGreaterThan (Vector8s va, Vector8s vb) {
275 Vector8s res = new Vector8s ();
279 for (int i = 0; i < 8; ++i)
280 *c++ = (short) (*a++ > *b++ ? -1 : 0);
284 [Acceleration (AccelMode.SSE2)]
285 public static unsafe Vector8s MultiplyStoreHigh (Vector8s va, Vector8s vb) {
286 Vector8s res = new Vector8s ();
290 for (int i = 0; i < 8; ++i)
291 *c++ = (short)((int)*a++ * (int)*b++ >> 16);
295 [Acceleration (AccelMode.SSE2)]
296 public static unsafe Vector16b PackWithUnsignedSaturation (Vector8s va, Vector8s vb) {
297 Vector16b res = new Vector16b ();
298 short *a = (short*)&va;
299 short *b = (short*)&vb;
300 byte *c = (byte*)&res;
301 for (int i = 0; i < 8; ++i)
302 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue));
303 for (int i = 0; i < 8; ++i)
304 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue));
308 [CLSCompliant(false)]
309 [Acceleration (AccelMode.SSE2)]
310 public static unsafe Vector16sb PackWithSignedSaturation (Vector8s va, Vector8s vb) {
311 Vector16sb res = new Vector16sb ();
312 short *a = (short*)&va;
313 short *b = (short*)&vb;
314 sbyte *c = (sbyte*)&res;
315 for (int i = 0; i < 8; ++i)
316 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue);
317 for (int i = 0; i < 8; ++i)
318 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue);
322 [Acceleration (AccelMode.SSE1)]
323 public static unsafe explicit operator Vector2d (Vector8s v)
325 Vector2d* p = (Vector2d*)&v;
329 [Acceleration (AccelMode.SSE1)]
330 public static unsafe explicit operator Vector4f (Vector8s v)
332 Vector4f* p = (Vector4f*)&v;
336 [Acceleration (AccelMode.SSE1)]
337 public static unsafe explicit operator Vector2l (Vector8s v)
339 Vector2l* p = (Vector2l*)&v;
343 [Acceleration (AccelMode.SSE1)]
344 [CLSCompliant(false)]
345 public static unsafe explicit operator Vector2ul (Vector8s v)
347 Vector2ul* p = (Vector2ul*)&v;
351 [Acceleration (AccelMode.SSE1)]
352 public static unsafe explicit operator Vector4i (Vector8s v)
354 Vector4i* p = (Vector4i*)&v;
358 [Acceleration (AccelMode.SSE1)]
359 [CLSCompliant(false)]
360 public static unsafe explicit operator Vector4ui (Vector8s v)
362 Vector4ui* p = (Vector4ui*)&v;
366 [Acceleration (AccelMode.SSE1)]
367 [CLSCompliant(false)]
368 public static unsafe explicit operator Vector8us (Vector8s v)
370 Vector8us* p = (Vector8us*)&v;
374 [Acceleration (AccelMode.SSE1)]
375 [CLSCompliant(false)]
376 public static unsafe explicit operator Vector16sb (Vector8s v)
378 Vector16sb* p = (Vector16sb*)&v;
382 [Acceleration (AccelMode.SSE1)]
383 public static unsafe explicit operator Vector16b (Vector8s v)
385 Vector16b* p = (Vector16b*)&v;
390 [Acceleration (AccelMode.SSE1)]
391 public static Vector8s LoadAligned (ref Vector8s v)
396 [Acceleration (AccelMode.SSE1)]
397 public static void StoreAligned (ref Vector8s res, Vector8s val)
402 [CLSCompliant(false)]
403 [Acceleration (AccelMode.SSE1)]
404 public static unsafe Vector8s LoadAligned (Vector8s *v)
409 [CLSCompliant(false)]
410 [Acceleration (AccelMode.SSE1)]
411 public static unsafe void StoreAligned (Vector8s *res, Vector8s val)
416 [Acceleration (AccelMode.SSE1)]
417 [CLSCompliant(false)]
418 public static void PrefetchTemporalAllCacheLevels (ref Vector8s res)
422 [Acceleration (AccelMode.SSE1)]
423 [CLSCompliant(false)]
424 public static void PrefetchTemporal1stLevelCache (ref Vector8s res)
428 [Acceleration (AccelMode.SSE1)]
429 [CLSCompliant(false)]
430 public static void PrefetchTemporal2ndLevelCache (ref Vector8s res)
434 [Acceleration (AccelMode.SSE1)]
435 [CLSCompliant(false)]
436 public static void PrefetchNonTemporal (ref Vector8s res)
440 [Acceleration (AccelMode.SSE1)]
441 [CLSCompliant(false)]
442 public static unsafe void PrefetchTemporalAllCacheLevels (Vector8s *res)
446 [Acceleration (AccelMode.SSE1)]
447 [CLSCompliant(false)]
448 public static unsafe void PrefetchTemporal1stLevelCache (Vector8s *res)
452 [Acceleration (AccelMode.SSE1)]
453 [CLSCompliant(false)]
454 public static unsafe void PrefetchTemporal2ndLevelCache (Vector8s *res)
458 [Acceleration (AccelMode.SSE1)]
459 [CLSCompliant(false)]
460 public static unsafe void PrefetchNonTemporal (Vector8s *res)