4 // Rodrigo Kumpera (rkumpera@novell.com)
6 // (C) 2008 Novell, Inc. (http://www.novell.com)
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 using System.Runtime.InteropServices;
32 [StructLayout(LayoutKind.Sequential, Pack = 0, Size = 16)]
34 public struct Vector8us
36 private ushort v0, v1, v2, v3, v4, v5, v6, v7;
37 public Vector8us (ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
49 public ushort V0 { get { return v0; } set { v0 = value; } }
50 public ushort V1 { get { return v1; } set { v1 = value; } }
51 public ushort V2 { get { return v2; } set { v2 = value; } }
52 public ushort V3 { get { return v3; } set { v3 = value; } }
53 public ushort V4 { get { return v4; } set { v4 = value; } }
54 public ushort V5 { get { return v5; } set { v5 = value; } }
55 public ushort V6 { get { return v6; } set { v6 = value; } }
56 public ushort V7 { get { return v7; } set { v7 = value; } }
58 [System.Runtime.CompilerServices.IndexerName ("Component")]
59 public unsafe ushort this [int index]
62 if ((index | 0x7) != 0x7) //index < 0 || index > 7
63 throw new ArgumentOutOfRangeException ("index");
64 fixed (ushort *v = &v0) {
69 if ( (index | 0x7) != 0x7) //index < 0 || index > 7
70 throw new ArgumentOutOfRangeException ("index");
71 fixed (ushort *v = &v0) {
72 * (v + index) = value;
77 [Acceleration (AccelMode.SSE2)]
78 public static unsafe Vector8us operator + (Vector8us va, Vector8us vb)
80 Vector8us res = new Vector8us ();
84 for (int i = 0; i < 8; ++i)
85 *c++ = (ushort)(*a++ + *b++);
89 [Acceleration (AccelMode.SSE2)]
90 public static unsafe Vector8us operator - (Vector8us va, Vector8us vb)
92 Vector8us res = new Vector8us ();
96 for (int i = 0; i < 8; ++i)
97 *c++ = (ushort)(*a++ - *b++);
102 * NOTE: Thou pmullw states it does signed multiplication, it works for unsigned numbers
103 * if only the lower part is considered and the flags disregarded.
105 [Acceleration (AccelMode.SSE2)]
106 public static unsafe Vector8us operator * (Vector8us va, Vector8us vb)
108 Vector8us res = new Vector8us ();
112 for (int i = 0; i < 8; ++i)
113 *c++ = (ushort)(*a++ * (*b++));
117 [Acceleration (AccelMode.SSE2)]
118 public static unsafe Vector8us operator >> (Vector8us va, int amount)
120 Vector8us res = new Vector8us ();
123 for (int i = 0; i < 8; ++i)
124 *b++ = (ushort)(*a++ >> amount);
128 [Acceleration (AccelMode.SSE2)]
129 public static unsafe Vector8us operator << (Vector8us va, int amount)
131 Vector8us res = new Vector8us ();
134 for (int i = 0; i < 8; ++i)
135 *b++ = (ushort)(*a++ << amount);
139 [Acceleration (AccelMode.SSE2)]
140 public static unsafe Vector8us operator & (Vector8us va, Vector8us vb)
142 Vector8us res = new Vector8us ();
143 ulong *a = (ulong*) &va.v0;
144 ulong *b = (ulong*) &vb.v0;
145 ulong *c = (ulong*) &res.v0;
146 *c++ = (ulong)(*a++ & *b++);
147 *c = (ulong)(*a & *b);
151 [Acceleration (AccelMode.SSE2)]
152 public static unsafe Vector8us operator | (Vector8us va, Vector8us vb)
154 Vector8us res = new Vector8us ();
155 ulong *a = (ulong*) &va.v0;
156 ulong *b = (ulong*) &vb.v0;
157 ulong *c = (ulong*) &res.v0;
158 *c++ = (ulong)(*a++ | *b++);
159 *c = (ulong)(*a | *b);
163 [Acceleration (AccelMode.SSE2)]
164 public static unsafe Vector8us operator ^ (Vector8us va, Vector8us vb)
166 Vector8us res = new Vector8us ();
167 ulong *a = (ulong*) &va.v0;
168 ulong *b = (ulong*) &vb.v0;
169 ulong *c = (ulong*) &res.v0;
170 *c++ = (ulong)(*a++ ^ *b++);
171 *c = (ulong)(*a ^ *b);
175 [Acceleration (AccelMode.SSE2)]
176 public static unsafe Vector8us UnpackLow (Vector8us va, Vector8us vb)
178 return new Vector8us (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3);
181 [Acceleration (AccelMode.SSE2)]
182 public static unsafe Vector8us UnpackHigh (Vector8us va, Vector8us vb)
184 return new Vector8us (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
187 [Acceleration (AccelMode.SSE2)]
188 public static unsafe Vector8us ArithmeticRightShift (Vector8us va, int amount)
190 Vector8us res = new Vector8us ();
193 for (int i = 0; i < 8; ++i)
194 *b++ = (ushort)((short)(*a++) >> amount);
198 [Acceleration (AccelMode.SSE2)]
199 public static unsafe Vector8us AddWithSaturation (Vector8us va, Vector8us vb) {
200 Vector8us res = new Vector8us ();
204 for (int i = 0; i < 8; ++i)
205 *c++ = (ushort) System.Math.Min (*a++ + *b++, ushort.MaxValue);
209 [Acceleration (AccelMode.SSE2)]
210 public static unsafe Vector8us SubtractWithSaturation (Vector8us va, Vector8us vb) {
211 Vector8us res = new Vector8us ();
215 for (int i = 0; i < 8; ++i)
216 *c++ = (ushort) System.Math.Max (*a++ - *b++, 0);
220 [Acceleration (AccelMode.SSE2)]
221 public static unsafe Vector8us Average (Vector8us va, Vector8us vb) {
222 Vector8us res = new Vector8us ();
226 for (int i = 0; i < 8; ++i)
227 *c++ = (ushort) ((*a++ + *b++ + 1) >> 1);
231 [Acceleration (AccelMode.SSE41)]
232 public static unsafe Vector8us Max (Vector8us va, Vector8us vb) {
233 Vector8us res = new Vector8us ();
237 for (int i = 0; i < 8; ++i)
238 *c++ = (ushort) System.Math.Max (*a++, *b++);
242 [Acceleration (AccelMode.SSE41)]
243 public static unsafe Vector8us Min (Vector8us va, Vector8us vb) {
244 Vector8us res = new Vector8us ();
248 for (int i = 0; i < 8; ++i)
249 *c++ = (ushort) System.Math.Min (*a++, *b++);
253 [Acceleration (AccelMode.SSE2)]
254 public static unsafe int ExtractByteMask (Vector8us va) {
256 byte *a = (byte*)&va;
257 for (int i = 0; i < 16; ++i)
258 res |= (*a++ & 0x80) >> 7 << i;
262 [Acceleration (AccelMode.SSE2)]
263 public static unsafe Vector8us ShuffleHigh (Vector8us va, ShuffleSel sel)
265 ushort *ptr = ((ushort*)&va) + 4;
267 return new Vector8us (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)));
270 [Acceleration (AccelMode.SSE2)]
271 public static unsafe Vector8us ShuffleLow (Vector8us va, ShuffleSel sel)
273 ushort *ptr = ((ushort*)&va);
275 return new Vector8us (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7);
278 [Acceleration (AccelMode.SSE2)]
279 public static unsafe Vector8us CompareEqual (Vector8us va, Vector8us vb) {
280 Vector8us res = new Vector8us ();
284 for (int i = 0; i < 8; ++i)
285 *c++ = (ushort) (*a++ == *b++ ? -1 : 0);
289 [Acceleration (AccelMode.SSE2)]
290 public static unsafe Vector8us MultiplyStoreHigh (Vector8us va, Vector8us vb) {
291 Vector8us res = new Vector8us ();
295 for (int i = 0; i < 8; ++i)
296 *c++ = (ushort)((uint)*a++ * (uint)*b++ >> 16);
300 /*This function performs a packuswb, which treats the source as a signed value */
301 [Acceleration (AccelMode.SSE2)]
302 public static unsafe Vector16b SignedPackWithUnsignedSaturation (Vector8us va, Vector8us vb) {
303 Vector16b res = new Vector16b ();
304 short *a = (short*)&va;
305 short *b = (short*)&vb;
306 byte *c = (byte*)&res;
307 for (int i = 0; i < 8; ++i)
308 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue));
309 for (int i = 0; i < 8; ++i)
310 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue));
314 /*This function performs a packsswb, which treats the source as a signed value */
315 [Acceleration (AccelMode.SSE2)]
316 public static unsafe Vector16sb SignedPackWithSignedSaturation (Vector8us va, Vector8us vb) {
317 Vector16sb res = new Vector16sb ();
318 short *a = (short*)&va;
319 short *b = (short*)&vb;
320 sbyte *c = (sbyte*)&res;
321 for (int i = 0; i < 8; ++i)
322 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue);
323 for (int i = 0; i < 8; ++i)
324 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue);
328 [Acceleration (AccelMode.SSE1)]
329 public static unsafe explicit operator Vector2d (Vector8us v)
331 Vector2d* p = (Vector2d*)&v;
335 [Acceleration (AccelMode.SSE1)]
336 public static unsafe explicit operator Vector4f (Vector8us v)
338 Vector4f* p = (Vector4f*)&v;
342 [Acceleration (AccelMode.SSE1)]
343 public static unsafe explicit operator Vector2l (Vector8us v)
345 Vector2l* p = (Vector2l*)&v;
349 [Acceleration (AccelMode.SSE1)]
350 public static unsafe explicit operator Vector2ul (Vector8us v)
352 Vector2ul* p = (Vector2ul*)&v;
356 [Acceleration (AccelMode.SSE1)]
357 public static unsafe explicit operator Vector4i (Vector8us v)
359 Vector4i* p = (Vector4i*)&v;
363 [Acceleration (AccelMode.SSE1)]
364 public static unsafe explicit operator Vector4ui (Vector8us v)
366 Vector4ui* p = (Vector4ui*)&v;
370 [Acceleration (AccelMode.SSE1)]
371 public static unsafe explicit operator Vector8s (Vector8us v)
373 Vector8s* p = (Vector8s*)&v;
377 [Acceleration (AccelMode.SSE1)]
378 public static unsafe explicit operator Vector16sb (Vector8us v)
380 Vector16sb* p = (Vector16sb*)&v;
384 [Acceleration (AccelMode.SSE1)]
385 public static unsafe explicit operator Vector16b (Vector8us v)
387 Vector16b* p = (Vector16b*)&v;
392 [Acceleration (AccelMode.SSE1)]
393 public static Vector8us LoadAligned (ref Vector8us v)
398 [Acceleration (AccelMode.SSE1)]
399 public static void StoreAligned (ref Vector8us res, Vector8us val)
404 [Acceleration (AccelMode.SSE1)]
405 public static unsafe Vector8us LoadAligned (Vector8us *v)
410 [Acceleration (AccelMode.SSE1)]
411 public static unsafe void StoreAligned (Vector8us *res, Vector8us val)
416 [Acceleration (AccelMode.SSE1)]
417 [CLSCompliant(false)]
418 public static void PrefetchTemporalAllCacheLevels (ref Vector8us res)
422 [Acceleration (AccelMode.SSE1)]
423 [CLSCompliant(false)]
424 public static void PrefetchTemporal1stLevelCache (ref Vector8us res)
428 [Acceleration (AccelMode.SSE1)]
429 [CLSCompliant(false)]
430 public static void PrefetchTemporal2ndLevelCache (ref Vector8us res)
434 [Acceleration (AccelMode.SSE1)]
435 [CLSCompliant(false)]
436 public static void PrefetchNonTemporal (ref Vector8us res)
440 [Acceleration (AccelMode.SSE1)]
441 [CLSCompliant(false)]
442 public static unsafe void PrefetchTemporalAllCacheLevels (Vector8us *res)
446 [Acceleration (AccelMode.SSE1)]
447 [CLSCompliant(false)]
448 public static unsafe void PrefetchTemporal1stLevelCache (Vector8us *res)
452 [Acceleration (AccelMode.SSE1)]
453 [CLSCompliant(false)]
454 public static unsafe void PrefetchTemporal2ndLevelCache (Vector8us *res)
458 [Acceleration (AccelMode.SSE1)]
459 [CLSCompliant(false)]
460 public static unsafe void PrefetchNonTemporal (Vector8us *res)