4 // Rodrigo Kumpera (rkumpera@novell.com)
6 // (C) 2008 Novell, Inc. (http://www.novell.com)
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 using System.Runtime.InteropServices;
\r
32 [StructLayout(LayoutKind.Sequential, Pack = 0, Size = 16)]
33 [CLSCompliant(false)]
\r
34 public struct Vector8us
\r
36 private ushort v0, v1, v2, v3, v4, v5, v6, v7;
\r\r
37 public Vector8us (ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
\r
49 public ushort V0 { get { return v0; } set { v0 = value; } }
50 public ushort V1 { get { return v1; } set { v1 = value; } }
51 public ushort V2 { get { return v2; } set { v2 = value; } }
52 public ushort V3 { get { return v3; } set { v3 = value; } }
53 public ushort V4 { get { return v4; } set { v4 = value; } }
54 public ushort V5 { get { return v5; } set { v5 = value; } }
55 public ushort V6 { get { return v6; } set { v6 = value; } }
56 public ushort V7 { get { return v7; } set { v7 = value; } }
58 [Acceleration (AccelMode.SSE2)]
59 public static unsafe Vector8us operator + (Vector8us va, Vector8us vb)
\r
61 Vector8us res = new Vector8us ();
65 for (int i = 0; i < 8; ++i)
66 *c++ = (ushort)(*a++ + *b++);
70 [Acceleration (AccelMode.SSE2)]
71 public static unsafe Vector8us operator - (Vector8us va, Vector8us vb)
\r
73 Vector8us res = new Vector8us ();
77 for (int i = 0; i < 8; ++i)
78 *c++ = (ushort)(*a++ - *b++);
83 * NOTE: Thou pmullw states it does signed multiplication, it works for unsigned numbers
84 * if only the lower part is considered and the flags disregarded.
86 [Acceleration (AccelMode.SSE2)]
87 public static unsafe Vector8us operator * (Vector8us va, Vector8us vb)
\r
89 Vector8us res = new Vector8us ();
93 for (int i = 0; i < 8; ++i)
94 *c++ = (ushort)(*a++ * (*b++));
98 [Acceleration (AccelMode.SSE2)]
99 public static unsafe Vector8us operator >> (Vector8us va, int amount)
\r
101 Vector8us res = new Vector8us ();
104 for (int i = 0; i < 8; ++i)
105 *b++ = (ushort)(*a++ >> amount);
109 [Acceleration (AccelMode.SSE2)]
110 public static unsafe Vector8us operator << (Vector8us va, int amount)
\r
112 Vector8us res = new Vector8us ();
115 for (int i = 0; i < 8; ++i)
116 *b++ = (ushort)(*a++ << amount);
120 [Acceleration (AccelMode.SSE2)]
121 public static unsafe Vector8us operator & (Vector8us va, Vector8us vb)
\r
123 Vector8us res = new Vector8us ();
127 for (int i = 0; i < 8; ++i)
128 *c++ = (ushort)(*a++ & *b++);
132 [Acceleration (AccelMode.SSE2)]
133 public static unsafe Vector8us operator | (Vector8us va, Vector8us vb)
\r
135 Vector8us res = new Vector8us ();
139 for (int i = 0; i < 8; ++i)
140 *c++ = (ushort)(*a++ | *b++);
144 [Acceleration (AccelMode.SSE2)]
145 public static unsafe Vector8us operator ^ (Vector8us va, Vector8us vb)
\r
147 Vector8us res = new Vector8us ();
151 for (int i = 0; i < 8; ++i)
152 *c++ = (ushort)(*a++ ^ *b++);
156 [Acceleration (AccelMode.SSE2)]
157 public static unsafe Vector8us UnpackLow (Vector8us va, Vector8us vb)
159 return new Vector8us (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3);
162 [Acceleration (AccelMode.SSE2)]
163 public static unsafe Vector8us UnpackHigh (Vector8us va, Vector8us vb)
165 return new Vector8us (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
168 [Acceleration (AccelMode.SSE2)]
169 public static unsafe Vector8us ShiftRightArithmetic (Vector8us va, int amount)
\r
171 Vector8us res = new Vector8us ();
174 for (int i = 0; i < 8; ++i)
175 *b++ = (ushort)((short)(*a++) >> amount);
179 [Acceleration (AccelMode.SSE2)]
180 public static unsafe Vector8us AddWithSaturation (Vector8us va, Vector8us vb) {
181 Vector8us res = new Vector8us ();
185 for (int i = 0; i < 8; ++i)
186 *c++ = (ushort) System.Math.Min (*a++ + *b++, ushort.MaxValue);
190 [Acceleration (AccelMode.SSE2)]
191 public static unsafe Vector8us SubWithSaturation (Vector8us va, Vector8us vb) {
192 Vector8us res = new Vector8us ();
196 for (int i = 0; i < 8; ++i)
197 *c++ = (ushort) System.Math.Max (*a++ - *b++, 0);
201 [Acceleration (AccelMode.SSE2)]
202 public static unsafe Vector8us Average (Vector8us va, Vector8us vb) {
203 Vector8us res = new Vector8us ();
207 for (int i = 0; i < 8; ++i)
208 *c++ = (ushort) ((*a++ + *b++ + 1) >> 1);
212 [Acceleration (AccelMode.SSE41)]
213 public static unsafe Vector8us Max (Vector8us va, Vector8us vb) {
214 Vector8us res = new Vector8us ();
218 for (int i = 0; i < 8; ++i)
219 *c++ = (ushort) System.Math.Max (*a++, *b++);
223 [Acceleration (AccelMode.SSE41)]
224 public static unsafe Vector8us Min (Vector8us va, Vector8us vb) {
225 Vector8us res = new Vector8us ();
229 for (int i = 0; i < 8; ++i)
230 *c++ = (ushort) System.Math.Min (*a++, *b++);
234 [Acceleration (AccelMode.SSE2)]
235 public static unsafe int ExtractByteMask (Vector8us va) {
237 byte *a = (byte*)&va;
238 for (int i = 0; i < 16; ++i)
239 res |= (*a++ & 0x80) >> 7 << i;
243 [Acceleration (AccelMode.SSE2)]
244 public static unsafe Vector8us ShuffleHigh (Vector8us va, ShuffleSel sel)
\r
246 ushort *ptr = ((ushort*)&va) + 4;
247 int idx = (int)sel;
\r
248 return new Vector8us (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)));
\r
251 [Acceleration (AccelMode.SSE2)]
252 public static unsafe Vector8us ShuffleLow (Vector8us va, ShuffleSel sel)
\r
254 ushort *ptr = ((ushort*)&va);
255 int idx = (int)sel;
\r
256 return new Vector8us (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7);
\r
259 [Acceleration (AccelMode.SSE2)]
260 public static unsafe Vector8us CompareEqual (Vector8us va, Vector8us vb) {
261 Vector8us res = new Vector8us ();
265 for (int i = 0; i < 8; ++i)
266 *c++ = (ushort) (*a++ == *b++ ? -1 : 0);
270 [Acceleration (AccelMode.SSE2)]
271 public static unsafe Vector8us MultiplyStoreHigh (Vector8us va, Vector8us vb) {
272 Vector8us res = new Vector8us ();
276 for (int i = 0; i < 8; ++i)
277 *c++ = (ushort)((uint)*a++ * (uint)*b++ >> 16);
281 /*This function performs a packuswb, which treats the source as a signed value */
282 [Acceleration (AccelMode.SSE2)]
283 public static unsafe Vector16b SignedPackWithUnsignedSaturation (Vector8us va, Vector8us vb) {
284 Vector16b res = new Vector16b ();
285 short *a = (short*)&va;
286 short *b = (short*)&vb;
287 byte *c = (byte*)&res;
288 for (int i = 0; i < 8; ++i)
289 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue));
290 for (int i = 0; i < 8; ++i)
291 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue));
295 /*This function performs a packsswb, which treats the source as a signed value */
296 [Acceleration (AccelMode.SSE2)]
297 public static unsafe Vector16sb SignedPackWithSignedSaturation (Vector8us va, Vector8us vb) {
298 Vector16sb res = new Vector16sb ();
299 short *a = (short*)&va;
300 short *b = (short*)&vb;
301 sbyte *c = (sbyte*)&res;
302 for (int i = 0; i < 8; ++i)
303 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue);
304 for (int i = 0; i < 8; ++i)
305 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue);
309 [Acceleration (AccelMode.SSE1)]
310 public static unsafe explicit operator Vector2d (Vector8us v)
\r
312 Vector2d* p = (Vector2d*)&v;
\r
316 [Acceleration (AccelMode.SSE1)]
317 public static unsafe explicit operator Vector4f (Vector8us v)
\r
319 Vector4f* p = (Vector4f*)&v;
\r
323 [Acceleration (AccelMode.SSE1)]
324 public static unsafe explicit operator Vector2l (Vector8us v)
\r
326 Vector2l* p = (Vector2l*)&v;
\r
330 [Acceleration (AccelMode.SSE1)]
331 public static unsafe explicit operator Vector2ul (Vector8us v)
\r
333 Vector2ul* p = (Vector2ul*)&v;
\r
337 [Acceleration (AccelMode.SSE1)]
338 public static unsafe explicit operator Vector4i (Vector8us v)
\r
340 Vector4i* p = (Vector4i*)&v;
\r
344 [Acceleration (AccelMode.SSE1)]
345 public static unsafe explicit operator Vector4ui (Vector8us v)
\r
347 Vector4ui* p = (Vector4ui*)&v;
\r
351 [Acceleration (AccelMode.SSE1)]
352 public static unsafe explicit operator Vector8s (Vector8us v)
\r
354 Vector8s* p = (Vector8s*)&v;
\r
358 [Acceleration (AccelMode.SSE1)]
359 public static unsafe explicit operator Vector16sb (Vector8us v)
\r
361 Vector16sb* p = (Vector16sb*)&v;
\r
365 [Acceleration (AccelMode.SSE1)]
366 public static unsafe explicit operator Vector16b (Vector8us v)
\r
368 Vector16b* p = (Vector16b*)&v;
\r
373 [Acceleration (AccelMode.SSE1)]
374 public static Vector8us LoadAligned (ref Vector8us v)
\r
379 [Acceleration (AccelMode.SSE1)]
380 public static void StoreAligned (ref Vector8us res, Vector8us val)
\r
385 [Acceleration (AccelMode.SSE1)]
386 public static unsafe Vector8us LoadAligned (Vector8us *v)
\r
391 [Acceleration (AccelMode.SSE1)]
392 public static unsafe void StoreAligned (Vector8us *res, Vector8us val)
\r