4 // Rodrigo Kumpera (rkumpera@novell.com)
6 // (C) 2008 Novell, Inc. (http://www.novell.com)
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 using System.Runtime.InteropServices;
\r
33 public enum ShuffleSel
\r
55 /*Expand a single element into all elements*/
56 ExpandX = XFromX | YFromX | ZFromX | WFromX,
57 ExpandY = XFromY | YFromY | ZFromY | WFromY,
58 ExpandZ = XFromZ | YFromZ | ZFromZ | WFromZ,
59 ExpandW = XFromW | YFromW | ZFromW | WFromW,
61 /*Expand a pair of elements (x,y,z,w) -> (x,x,y,y)*/
62 ExpandXY = XFromX | YFromX | ZFromY | WFromY,
63 ExpandZW = XFromZ | YFromZ | ZFromW | WFromW,
65 /*Expand interleaving elements (x,y,z,w) -> (x,y,x,y)*/
66 ExpandInterleavedXY = XFromX | YFromY | ZFromX | WFromY,
67 ExpandInterleavedZW = XFromZ | YFromW | ZFromZ | WFromW,
70 RotateRight = XFromY | YFromZ | ZFromW | WFromX,
71 RotateLeft = XFromW | YFromX | ZFromY | WFromZ,
74 Swap = XFromW | YFromZ | ZFromY | WFromX,
79 Unary - (implemented as mulps [-1,-1,-1,-1])
80 Abs (implemented as pand [7fffffff,...] )
82 Mask extraction function
85 Single float constructor (expand it to the 4 positions)
86 Replace Shuffle with less bug prone methods
89 [StructLayout(LayoutKind.Sequential, Pack = 0, Size = 16)]
\r
90 public struct Vector4f
\r
97 public float X { get { return x; } set { x = value; } }
\r
98 public float Y { get { return y; } set { y = value; } }
\r
99 public float Z { get { return z; } set { z = value; } }
\r
100 public float W { get { return w; } set { w = value; } }
\r
102 public Vector4f (float x, float y, float z, float w)
\r
110 [Acceleration (AccelMode.SSE1)]
111 public static unsafe Vector4f operator & (Vector4f v1, Vector4f v2)
\r
113 Vector4f res = new Vector4f ();
114 int *a = (int*)&v1;
\r
115 int *b = (int*)&v2;
\r
117 *c++ = *a++ & *b++;
\r
118 *c++ = *a++ & *b++;
\r
119 *c++ = *a++ & *b++;
\r
124 [Acceleration (AccelMode.SSE1)]
125 public static unsafe Vector4f operator | (Vector4f v1, Vector4f v2)
\r
127 Vector4f res = new Vector4f ();
128 int *a = (int*)&v1;
\r
129 int *b = (int*)&v2;
\r
131 *c++ = *a++ | *b++;
\r
132 *c++ = *a++ | *b++;
\r
133 *c++ = *a++ | *b++;
\r
138 [Acceleration (AccelMode.SSE1)]
139 public static unsafe Vector4f operator ^ (Vector4f v1, Vector4f v2)
\r
141 Vector4f res = new Vector4f ();
142 int *a = (int*)&v1;
\r
143 int *b = (int*)&v2;
\r
145 *c++ = *a++ ^ *b++;
\r
146 *c++ = *a++ ^ *b++;
\r
147 *c++ = *a++ ^ *b++;
\r
152 [Acceleration (AccelMode.SSE1)]
153 public static Vector4f operator + (Vector4f v1, Vector4f v2)
\r
155 return new Vector4f (v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w);
\r
158 [Acceleration (AccelMode.SSE1)]
159 public static Vector4f operator - (Vector4f v1, Vector4f v2)
\r
161 return new Vector4f (v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w);
\r
164 [Acceleration (AccelMode.SSE1)]
165 public static Vector4f operator * (Vector4f v1, Vector4f v2)
\r
167 return new Vector4f (v1.x * v2.x, v1.y * v2.y, v1.z * v2.z, v1.w * v2.w);
\r
170 [Acceleration (AccelMode.SSE1)]
171 public static Vector4f operator / (Vector4f v1, Vector4f v2)
\r
173 return new Vector4f (v1.x / v2.x, v1.y / v2.y, v1.z / v2.z, v1.w / v2.w);
\r
176 [Acceleration (AccelMode.SSE1)]
177 public static unsafe Vector4f AndNot (Vector4f v1, Vector4f v2)
\r
179 Vector4f res = new Vector4f ();
180 int *a = (int*)&v1;
\r
181 int *b = (int*)&v2;
\r
183 *c++ = ~*a++ & *b++;
\r
184 *c++ = ~*a++ & *b++;
\r
185 *c++ = ~*a++ & *b++;
\r
190 [Acceleration (AccelMode.SSE1)]
191 public static Vector4f Sqrt (Vector4f v1)
\r
193 return new Vector4f ((float)System.Math.Sqrt ((float)v1.x),
\r
194 (float)System.Math.Sqrt ((float)v1.y),
\r
195 (float)System.Math.Sqrt ((float)v1.z),
\r
196 (float)System.Math.Sqrt ((float)v1.w));
\r
199 [Acceleration (AccelMode.SSE1)]
200 public static Vector4f InvSqrt (Vector4f v1)
\r
202 return new Vector4f ((float)(1.0 / System.Math.Sqrt ((float)v1.x)),
\r
203 (float)(1.0 / System.Math.Sqrt ((float)v1.y)),
\r
204 (float)(1.0 / System.Math.Sqrt ((float)v1.z)),
\r
205 (float)(1.0 / System.Math.Sqrt ((float)v1.w)));
\r
208 [Acceleration (AccelMode.SSE1)]
209 public static Vector4f Reciprocal (Vector4f v1)
\r
211 return new Vector4f (1.0f / v1.x, 1.0f / v1.y, 1.0f / v1.z, 1.0f / v1.w);
\r
214 [Acceleration (AccelMode.SSE1)]
215 public static Vector4f Max (Vector4f v1, Vector4f v2)
\r
217 return new Vector4f (System.Math.Max (v1.x, v2.x),
\r
218 System.Math.Max (v1.y, v2.y),
\r
219 System.Math.Max (v1.z, v2.z),
\r
220 System.Math.Max (v1.w, v2.w));
\r
223 [Acceleration (AccelMode.SSE1)]
224 public static Vector4f Min (Vector4f v1, Vector4f v2)
\r
226 return new Vector4f (System.Math.Min (v1.x, v2.x),
\r
227 System.Math.Min (v1.y, v2.y),
\r
228 System.Math.Min (v1.z, v2.z),
\r
229 System.Math.Min (v1.w, v2.w));
\r
232 [Acceleration (AccelMode.SSE3)]
233 public static Vector4f HorizontalAdd (Vector4f v1, Vector4f v2)
\r
235 return new Vector4f (v1.x + v1.y, v1.z + v1.w, v2.x + v2.y, v2.z + v2.w);
\r
238 [Acceleration (AccelMode.SSE3)]
239 public static Vector4f AddSub (Vector4f v1, Vector4f v2)
\r
241 return new Vector4f (v1.x - v2.x, v1.y + v2.y, v1.z - v2.z, v1.w + v2.w);
\r
244 [Acceleration (AccelMode.SSE3)]
245 public static Vector4f HorizontalSub (Vector4f v1, Vector4f v2)
\r
247 return new Vector4f (v1.x - v1.y, v1.z - v1.w, v2.x - v2.y, v2.z - v2.w);
\r
250 [Acceleration (AccelMode.SSE1)]
251 public static Vector4f InterleaveHigh (Vector4f v1, Vector4f v2)
\r
253 return new Vector4f (v1.z, v2.z, v1.w, v2.w);
\r
256 [Acceleration (AccelMode.SSE1)]
257 public static Vector4f InterleaveLow (Vector4f v1, Vector4f v2)
\r
259 return new Vector4f (v1.x, v2.x, v1.y, v2.y);
\r
263 [Acceleration (AccelMode.SSE1)]
264 public unsafe static Vector4f CompareEqual (Vector4f v1, Vector4f v2)
266 Vector4f res = new Vector4f ();
268 *c++ = v1.x == v2.x ? -1 : 0;
\r
269 *c++ = v1.y == v2.y ? -1 : 0;
\r
270 *c++ = v1.z == v2.z ? -1 : 0;
\r
271 *c = v1.w == v2.w ? -1 : 0;
275 [Acceleration (AccelMode.SSE1)]
276 public unsafe static Vector4f CompareLessThan (Vector4f v1, Vector4f v2)
278 Vector4f res = new Vector4f ();
280 *c++ = v1.x < v2.x ? -1 : 0;
\r
281 *c++ = v1.y < v2.y ? -1 : 0;
\r
282 *c++ = v1.z < v2.z ? -1 : 0;
\r
283 *c = v1.w < v2.w ? -1 : 0;
288 [Acceleration (AccelMode.SSE1)]
289 public unsafe static Vector4f CompareLessEqual (Vector4f v1, Vector4f v2)
291 Vector4f res = new Vector4f ();
293 *c++ = v1.x <= v2.x ? -1 : 0;
\r
294 *c++ = v1.y <= v2.y ? -1 : 0;
\r
295 *c++ = v1.z <= v2.z ? -1 : 0;
\r
296 *c = v1.w <= v2.w ? -1 : 0;
299 /*Same float.IsNaN (a) || float.IsNaN (b). */
300 [Acceleration (AccelMode.SSE1)]
301 public unsafe static Vector4f CompareUnordered (Vector4f v1, Vector4f v2)
303 Vector4f res = new Vector4f ();
305 *c++ = float.IsNaN (v1.x) || float.IsNaN (v2.x) ? -1 : 0;
\r
306 *c++ = float.IsNaN (v1.y) || float.IsNaN (v2.y) ? -1 : 0;
\r
307 *c++ = float.IsNaN (v1.z) || float.IsNaN (v2.z) ? -1 : 0;
\r
308 *c = float.IsNaN (v1.w) || float.IsNaN (v2.w) ? -1 : 0;
312 [Acceleration (AccelMode.SSE1)]
313 public unsafe static Vector4f CompareNotEqual (Vector4f v1, Vector4f v2)
315 Vector4f res = new Vector4f ();
317 *c++ = v1.x != v2.x ? -1 : 0;
\r
318 *c++ = v1.y != v2.y ? -1 : 0;
\r
319 *c++ = v1.z != v2.z ? -1 : 0;
\r
320 *c = v1.w != v2.w ? -1 : 0;
324 /*Same as !(a < b). */
325 [Acceleration (AccelMode.SSE1)]
326 public unsafe static Vector4f CompareNotLessThan (Vector4f v1, Vector4f v2)
328 Vector4f res = new Vector4f ();
330 *c++ = v1.x < v2.x ? 0 : -1;
\r
331 *c++ = v1.y < v2.y ? 0 : -1;
\r
332 *c++ = v1.z < v2.z ? 0 : -1;
\r
333 *c = v1.w < v2.w ? 0 : -1;
337 /*Same as !(a <= b). */
338 [Acceleration (AccelMode.SSE1)]
339 public unsafe static Vector4f CompareNotLessEqual (Vector4f v1, Vector4f v2)
341 Vector4f res = new Vector4f ();
343 *c++ = v1.x <= v2.x ? 0 : -1;
\r
344 *c++ = v1.y <= v2.y ? 0 : -1;
\r
345 *c++ = v1.z <= v2.z ? 0 : -1;
\r
346 *c = v1.w <= v2.w ? 0 : -1;
350 /*Same !float.IsNaN (a) && !float.IsNaN (b). */
351 [Acceleration (AccelMode.SSE1)]
352 public unsafe static Vector4f CompareOrdered (Vector4f v1, Vector4f v2)
354 Vector4f res = new Vector4f ();
356 *c++ = !float.IsNaN (v1.x) && !float.IsNaN (v2.x) ? -1 : 0;
\r
357 *c++ = !float.IsNaN (v1.y) && !float.IsNaN (v2.y) ? -1 : 0;
\r
358 *c++ = !float.IsNaN (v1.z) && !float.IsNaN (v2.z) ? -1 : 0;
\r
359 *c = !float.IsNaN (v1.w) && !float.IsNaN (v2.w) ? -1 : 0;
362 [Acceleration (AccelMode.SSE3)]
363 public static Vector4f DuplicateLow (Vector4f v1)
365 return new Vector4f (v1.x, v1.x, v1.z, v1.z);
368 [Acceleration (AccelMode.SSE3)]
369 public static Vector4f DuplicateHigh (Vector4f v1)
371 return new Vector4f (v1.y, v1.y, v1.w, v1.w);
375 The sel argument must be a value combination of ShuffleSel flags.
377 [Acceleration (AccelMode.SSE2)]
378 public static unsafe Vector4f Shuffle (Vector4f v1, ShuffleSel sel)
\r
380 float *ptr = (float*)&v1;
381 int idx = (int)sel;
\r
382 return new Vector4f (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3)));
\r
385 [CLSCompliant(false)]
\r
386 [Acceleration (AccelMode.SSE1)]
387 public static unsafe explicit operator Vector4ui(Vector4f v)
\r
389 Vector4ui* p = (Vector4ui*)&v;
\r
393 [CLSCompliant(false)]
\r
394 [Acceleration (AccelMode.SSE1)]
395 public static unsafe explicit operator Vector8us(Vector4f v)
\r
397 Vector8us* p = (Vector8us*)&v;
\r
401 [CLSCompliant(false)]
\r
402 [Acceleration (AccelMode.SSE1)]
403 public static unsafe explicit operator Vector16b(Vector4f v)
\r
405 Vector16b* p = (Vector16b*)&v;
\r
409 [Acceleration (AccelMode.SSE1)]
410 public static Vector4f LoadAligned (ref Vector4f v)
\r
415 [Acceleration (AccelMode.SSE1)]
416 public static void StoreAligned (ref Vector4f res, Vector4f val)
\r
421 [CLSCompliant(false)]
\r
422 [Acceleration (AccelMode.SSE1)]
423 public static unsafe Vector4f LoadAligned (Vector4f *v)
\r
428 [CLSCompliant(false)]
\r
429 [Acceleration (AccelMode.SSE1)]
430 public static unsafe void StoreAligned (Vector4f *res, Vector4f val)
\r