4 // Rodrigo Kumpera (rkumpera@novell.com)
6 // (C) 2008 Novell, Inc. (http://www.novell.com)
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 using System.Runtime.InteropServices;
33 public enum ShuffleSel
55 /*Expand a single element into all elements*/
56 ExpandX = XFromX | YFromX | ZFromX | WFromX,
57 ExpandY = XFromY | YFromY | ZFromY | WFromY,
58 ExpandZ = XFromZ | YFromZ | ZFromZ | WFromZ,
59 ExpandW = XFromW | YFromW | ZFromW | WFromW,
61 /*Expand a pair of elements (x,y,z,w) -> (x,x,y,y)*/
62 ExpandXY = XFromX | YFromX | ZFromY | WFromY,
63 ExpandZW = XFromZ | YFromZ | ZFromW | WFromW,
65 /*Expand interleaving elements (x,y,z,w) -> (x,y,x,y)*/
66 ExpandInterleavedXY = XFromX | YFromY | ZFromX | WFromY,
67 ExpandInterleavedZW = XFromZ | YFromW | ZFromZ | WFromW,
70 RotateRight = XFromY | YFromZ | ZFromW | WFromX,
71 RotateLeft = XFromW | YFromX | ZFromY | WFromZ,
74 Swap = XFromW | YFromZ | ZFromY | WFromX,
79 Unary - (implemented as mulps [-1,-1,-1,-1])
80 Abs (implemented as pand [7fffffff,...] )
82 Mask extraction function
85 Single float constructor (expand it to the 4 positions)
86 Replace Shuffle with less bug prone methods
89 [StructLayout(LayoutKind.Sequential, Pack = 0, Size = 16)]
90 public struct Vector4f
97 public float X { get { return x; } set { x = value; } }
98 public float Y { get { return y; } set { y = value; } }
99 public float Z { get { return z; } set { z = value; } }
100 public float W { get { return w; } set { w = value; } }
102 [System.Runtime.CompilerServices.IndexerName ("Component")]
103 public unsafe float this [int index]
106 if ((index | 0x3) != 0x3) //index < 0 || index > 3
107 throw new ArgumentOutOfRangeException ("index");
108 fixed (float *v = &x) {
109 return * (v + index);
113 if ( (index | 0x3) != 0x3) //index < 0 || index > 3
114 throw new ArgumentOutOfRangeException ("index");
115 fixed (float *v = &x) {
116 * (v + index) = value;
121 public Vector4f (float x, float y, float z, float w)
129 [Acceleration (AccelMode.SSE1)]
130 public static unsafe Vector4f operator & (Vector4f v1, Vector4f v2)
132 Vector4f res = new Vector4f ();
143 [Acceleration (AccelMode.SSE1)]
144 public static unsafe Vector4f operator | (Vector4f v1, Vector4f v2)
146 Vector4f res = new Vector4f ();
157 [Acceleration (AccelMode.SSE1)]
158 public static unsafe Vector4f operator ^ (Vector4f v1, Vector4f v2)
160 Vector4f res = new Vector4f ();
171 [Acceleration (AccelMode.SSE1)]
172 public static Vector4f operator + (Vector4f v1, Vector4f v2)
174 return new Vector4f (v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w);
177 [Acceleration (AccelMode.SSE1)]
178 public static Vector4f operator - (Vector4f v1, Vector4f v2)
180 return new Vector4f (v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w);
183 [Acceleration (AccelMode.SSE1)]
184 public static Vector4f operator * (Vector4f v1, Vector4f v2)
186 return new Vector4f (v1.x * v2.x, v1.y * v2.y, v1.z * v2.z, v1.w * v2.w);
189 [Acceleration (AccelMode.SSE1)]
190 public static Vector4f operator / (Vector4f v1, Vector4f v2)
192 return new Vector4f (v1.x / v2.x, v1.y / v2.y, v1.z / v2.z, v1.w / v2.w);
195 [Acceleration (AccelMode.SSE1)]
196 public static unsafe Vector4f AndNot (Vector4f v1, Vector4f v2)
198 Vector4f res = new Vector4f ();
209 [Acceleration (AccelMode.SSE1)]
210 public static Vector4f Sqrt (Vector4f v1)
212 return new Vector4f ((float)System.Math.Sqrt ((float)v1.x),
213 (float)System.Math.Sqrt ((float)v1.y),
214 (float)System.Math.Sqrt ((float)v1.z),
215 (float)System.Math.Sqrt ((float)v1.w));
218 [Acceleration (AccelMode.SSE1)]
219 public static Vector4f InvSqrt (Vector4f v1)
221 return new Vector4f ((float)(1.0 / System.Math.Sqrt ((float)v1.x)),
222 (float)(1.0 / System.Math.Sqrt ((float)v1.y)),
223 (float)(1.0 / System.Math.Sqrt ((float)v1.z)),
224 (float)(1.0 / System.Math.Sqrt ((float)v1.w)));
227 [Acceleration (AccelMode.SSE1)]
228 public static Vector4f Reciprocal (Vector4f v1)
230 return new Vector4f (1.0f / v1.x, 1.0f / v1.y, 1.0f / v1.z, 1.0f / v1.w);
233 [Acceleration (AccelMode.SSE1)]
234 public static Vector4f Max (Vector4f v1, Vector4f v2)
236 return new Vector4f (System.Math.Max (v1.x, v2.x),
237 System.Math.Max (v1.y, v2.y),
238 System.Math.Max (v1.z, v2.z),
239 System.Math.Max (v1.w, v2.w));
242 [Acceleration (AccelMode.SSE1)]
243 public static Vector4f Min (Vector4f v1, Vector4f v2)
245 return new Vector4f (System.Math.Min (v1.x, v2.x),
246 System.Math.Min (v1.y, v2.y),
247 System.Math.Min (v1.z, v2.z),
248 System.Math.Min (v1.w, v2.w));
251 [Acceleration (AccelMode.SSE3)]
252 public static Vector4f HorizontalAdd (Vector4f v1, Vector4f v2)
254 return new Vector4f (v1.x + v1.y, v1.z + v1.w, v2.x + v2.y, v2.z + v2.w);
257 [Acceleration (AccelMode.SSE3)]
258 public static Vector4f AddSub (Vector4f v1, Vector4f v2)
260 return new Vector4f (v1.x - v2.x, v1.y + v2.y, v1.z - v2.z, v1.w + v2.w);
263 [Acceleration (AccelMode.SSE3)]
264 public static Vector4f HorizontalSub (Vector4f v1, Vector4f v2)
266 return new Vector4f (v1.x - v1.y, v1.z - v1.w, v2.x - v2.y, v2.z - v2.w);
269 [Acceleration (AccelMode.SSE1)]
270 public static Vector4f InterleaveHigh (Vector4f v1, Vector4f v2)
272 return new Vector4f (v1.z, v2.z, v1.w, v2.w);
275 [Acceleration (AccelMode.SSE1)]
276 public static Vector4f InterleaveLow (Vector4f v1, Vector4f v2)
278 return new Vector4f (v1.x, v2.x, v1.y, v2.y);
282 [Acceleration (AccelMode.SSE1)]
283 public unsafe static Vector4f CompareEqual (Vector4f v1, Vector4f v2)
285 Vector4f res = new Vector4f ();
287 *c++ = v1.x == v2.x ? -1 : 0;
288 *c++ = v1.y == v2.y ? -1 : 0;
289 *c++ = v1.z == v2.z ? -1 : 0;
290 *c = v1.w == v2.w ? -1 : 0;
294 [Acceleration (AccelMode.SSE1)]
295 public unsafe static Vector4f CompareLessThan (Vector4f v1, Vector4f v2)
297 Vector4f res = new Vector4f ();
299 *c++ = v1.x < v2.x ? -1 : 0;
300 *c++ = v1.y < v2.y ? -1 : 0;
301 *c++ = v1.z < v2.z ? -1 : 0;
302 *c = v1.w < v2.w ? -1 : 0;
307 [Acceleration (AccelMode.SSE1)]
308 public unsafe static Vector4f CompareLessEqual (Vector4f v1, Vector4f v2)
310 Vector4f res = new Vector4f ();
312 *c++ = v1.x <= v2.x ? -1 : 0;
313 *c++ = v1.y <= v2.y ? -1 : 0;
314 *c++ = v1.z <= v2.z ? -1 : 0;
315 *c = v1.w <= v2.w ? -1 : 0;
318 /*Same float.IsNaN (a) || float.IsNaN (b). */
319 [Acceleration (AccelMode.SSE1)]
320 public unsafe static Vector4f CompareUnordered (Vector4f v1, Vector4f v2)
322 Vector4f res = new Vector4f ();
324 *c++ = float.IsNaN (v1.x) || float.IsNaN (v2.x) ? -1 : 0;
325 *c++ = float.IsNaN (v1.y) || float.IsNaN (v2.y) ? -1 : 0;
326 *c++ = float.IsNaN (v1.z) || float.IsNaN (v2.z) ? -1 : 0;
327 *c = float.IsNaN (v1.w) || float.IsNaN (v2.w) ? -1 : 0;
331 [Acceleration (AccelMode.SSE1)]
332 public unsafe static Vector4f CompareNotEqual (Vector4f v1, Vector4f v2)
334 Vector4f res = new Vector4f ();
336 *c++ = v1.x != v2.x ? -1 : 0;
337 *c++ = v1.y != v2.y ? -1 : 0;
338 *c++ = v1.z != v2.z ? -1 : 0;
339 *c = v1.w != v2.w ? -1 : 0;
343 /*Same as !(a < b). */
344 [Acceleration (AccelMode.SSE1)]
345 public unsafe static Vector4f CompareNotLessThan (Vector4f v1, Vector4f v2)
347 Vector4f res = new Vector4f ();
349 *c++ = v1.x < v2.x ? 0 : -1;
350 *c++ = v1.y < v2.y ? 0 : -1;
351 *c++ = v1.z < v2.z ? 0 : -1;
352 *c = v1.w < v2.w ? 0 : -1;
356 /*Same as !(a <= b). */
357 [Acceleration (AccelMode.SSE1)]
358 public unsafe static Vector4f CompareNotLessEqual (Vector4f v1, Vector4f v2)
360 Vector4f res = new Vector4f ();
362 *c++ = v1.x <= v2.x ? 0 : -1;
363 *c++ = v1.y <= v2.y ? 0 : -1;
364 *c++ = v1.z <= v2.z ? 0 : -1;
365 *c = v1.w <= v2.w ? 0 : -1;
369 /*Same !float.IsNaN (a) && !float.IsNaN (b). */
370 [Acceleration (AccelMode.SSE1)]
371 public unsafe static Vector4f CompareOrdered (Vector4f v1, Vector4f v2)
373 Vector4f res = new Vector4f ();
375 *c++ = !float.IsNaN (v1.x) && !float.IsNaN (v2.x) ? -1 : 0;
376 *c++ = !float.IsNaN (v1.y) && !float.IsNaN (v2.y) ? -1 : 0;
377 *c++ = !float.IsNaN (v1.z) && !float.IsNaN (v2.z) ? -1 : 0;
378 *c = !float.IsNaN (v1.w) && !float.IsNaN (v2.w) ? -1 : 0;
381 [Acceleration (AccelMode.SSE3)]
382 public static Vector4f DuplicateLow (Vector4f v1)
384 return new Vector4f (v1.x, v1.x, v1.z, v1.z);
387 [Acceleration (AccelMode.SSE3)]
388 public static Vector4f DuplicateHigh (Vector4f v1)
390 return new Vector4f (v1.y, v1.y, v1.w, v1.w);
394 The sel argument must be a value combination of ShuffleSel flags.
396 [Acceleration (AccelMode.SSE2)]
397 public static unsafe Vector4f Shuffle (Vector4f v1, ShuffleSel sel)
399 float *ptr = (float*)&v1;
401 return new Vector4f (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3)));
404 [Acceleration (AccelMode.SSE1)]
405 public static unsafe explicit operator Vector2d (Vector4f v)
407 Vector2d* p = (Vector2d*)&v;
411 [Acceleration (AccelMode.SSE1)]
412 public static unsafe explicit operator Vector2l (Vector4f v)
414 Vector2l* p = (Vector2l*)&v;
418 [Acceleration (AccelMode.SSE1)]
419 [CLSCompliant(false)]
420 public static unsafe explicit operator Vector2ul (Vector4f v)
422 Vector2ul* p = (Vector2ul*)&v;
426 [Acceleration (AccelMode.SSE1)]
427 public static unsafe explicit operator Vector4i (Vector4f v)
429 Vector4i* p = (Vector4i*)&v;
433 [Acceleration (AccelMode.SSE1)]
434 [CLSCompliant(false)]
435 public static unsafe explicit operator Vector4ui (Vector4f v)
437 Vector4ui* p = (Vector4ui*)&v;
441 [Acceleration (AccelMode.SSE1)]
442 public static unsafe explicit operator Vector8s (Vector4f v)
444 Vector8s* p = (Vector8s*)&v;
448 [Acceleration (AccelMode.SSE1)]
449 [CLSCompliant(false)]
450 public static unsafe explicit operator Vector8us (Vector4f v)
452 Vector8us* p = (Vector8us*)&v;
456 [Acceleration (AccelMode.SSE1)]
457 [CLSCompliant(false)]
458 public static unsafe explicit operator Vector16sb (Vector4f v)
460 Vector16sb* p = (Vector16sb*)&v;
464 [Acceleration (AccelMode.SSE1)]
465 public static unsafe explicit operator Vector16b (Vector4f v)
467 Vector16b* p = (Vector16b*)&v;
471 [Acceleration (AccelMode.SSE1)]
472 public static Vector4f LoadAligned (ref Vector4f v)
477 [Acceleration (AccelMode.SSE1)]
478 public static void StoreAligned (ref Vector4f res, Vector4f val)
483 [CLSCompliant(false)]
484 [Acceleration (AccelMode.SSE1)]
485 public static unsafe Vector4f LoadAligned (Vector4f *v)
490 [CLSCompliant(false)]
491 [Acceleration (AccelMode.SSE1)]
492 public static unsafe void StoreAligned (Vector4f *res, Vector4f val)
497 [Acceleration (AccelMode.SSE1)]
498 [CLSCompliant(false)]
499 public static void PrefetchTemporalAllCacheLevels (ref Vector4f res)
503 [Acceleration (AccelMode.SSE1)]
504 [CLSCompliant(false)]
505 public static void PrefetchTemporal1stLevelCache (ref Vector4f res)
509 [Acceleration (AccelMode.SSE1)]
510 [CLSCompliant(false)]
511 public static void PrefetchTemporal2ndLevelCache (ref Vector4f res)
515 [Acceleration (AccelMode.SSE1)]
516 [CLSCompliant(false)]
517 public static void PrefetchNonTemporal (ref Vector4f res)
521 [Acceleration (AccelMode.SSE1)]
522 [CLSCompliant(false)]
523 public static unsafe void PrefetchTemporalAllCacheLevels (Vector4f *res)
527 [Acceleration (AccelMode.SSE1)]
528 [CLSCompliant(false)]
529 public static unsafe void PrefetchTemporal1stLevelCache (Vector4f *res)
533 [Acceleration (AccelMode.SSE1)]
534 [CLSCompliant(false)]
535 public static unsafe void PrefetchTemporal2ndLevelCache (Vector4f *res)
539 [Acceleration (AccelMode.SSE1)]
540 [CLSCompliant(false)]
541 public static unsafe void PrefetchNonTemporal (Vector4f *res)