2008-11-12 Cedric Vivier <cedricv@neonux.com>
[mono.git] / mcs / class / Mono.Simd / Mono.Simd / Vector4f.cs
1 // Vector4f.cs
2 //
3 // Author:
4 //   Rodrigo Kumpera (rkumpera@novell.com)
5 //
6 // (C) 2008 Novell, Inc. (http://www.novell.com)
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
15 //
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
18 //
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 //
27
28 using System;
29 using System.Runtime.InteropServices;
30
31 namespace Mono.Simd
32 {
33         public enum ShuffleSel
34         {
35                 XFromX,
36                 XFromY,
37                 XFromZ,
38                 XFromW,
39
40                 YFromX = 0x00,
41                 YFromY = 0x04,
42                 YFromZ = 0x08,
43                 YFromW = 0x0C,
44
45                 ZFromX = 0x00,
46                 ZFromY = 0x10,
47                 ZFromZ = 0x20,
48                 ZFromW = 0x30,
49
50                 WFromX = 0x00,
51                 WFromY = 0x40,
52                 WFromZ = 0x80,
53                 WFromW = 0xC0,
54
55                 /*Expand a single element into all elements*/
56                 ExpandX = XFromX | YFromX | ZFromX | WFromX,
57                 ExpandY = XFromY | YFromY | ZFromY | WFromY,
58                 ExpandZ = XFromZ | YFromZ | ZFromZ | WFromZ,
59                 ExpandW = XFromW | YFromW | ZFromW | WFromW,
60
61                 /*Expand a pair of elements (x,y,z,w) -> (x,x,y,y)*/
62                 ExpandXY = XFromX | YFromX | ZFromY | WFromY,
63                 ExpandZW = XFromZ | YFromZ | ZFromW | WFromW,
64
65                 /*Expand interleaving elements (x,y,z,w) -> (x,y,x,y)*/
66                 ExpandInterleavedXY = XFromX | YFromY | ZFromX | WFromY,
67                 ExpandInterleavedZW = XFromZ | YFromW | ZFromZ | WFromW,
68
69                 /*Rotate elements*/
70                 RotateRight = XFromY | YFromZ | ZFromW | WFromX,
71                 RotateLeft = XFromW | YFromX | ZFromY | WFromZ,
72
73                 /*Swap order*/
74                 Swap = XFromW | YFromZ | ZFromY | WFromX,
75         };
76
77 /*
78         TODO:
79         Unary - (implemented as mulps [-1,-1,-1,-1])
80         Abs (implemented as pand [7fffffff,...] )
81         Comparison functions
82         Mask extraction function
83         Setters
84         vector x float ops
85         Single float constructor (expand it to the 4 positions)
86                 Replace Shuffle with less bug prone methods
87 */
88
89         [StructLayout(LayoutKind.Sequential, Pack = 0, Size = 16)]
90         public struct Vector4f
91         {
92                 private float x;
93                 private float y;
94                 private float z;
95                 private float w;
96
97                 public float X { get { return x; } set { x = value; } }
98                 public float Y { get { return y; } set { y = value; } }
99                 public float Z { get { return z; } set { z = value; } }
100                 public float W { get { return w; } set { w = value; } }
101
102                 [System.Runtime.CompilerServices.IndexerName ("Component")]
103                 public unsafe float this [int index]
104                 {
105                         get {
106                                 if ((index | 0x3) != 0x3) //index < 0 || index > 3
107                                         throw new ArgumentOutOfRangeException ("index");
108                                 fixed (float *v = &x) {
109                                         return * (v + index);
110                                 }
111                         }
112                         set {
113                                 if ( (index | 0x3) != 0x3) //index < 0 || index > 3
114                                         throw new ArgumentOutOfRangeException ("index");
115                                 fixed (float *v = &x) {
116                                         * (v + index) = value;
117                                 }
118                         }
119                 }
120
121                 public Vector4f (float x, float y, float z, float w)
122                 {
123                         this.x = x;
124                         this.y = y;
125                         this.z = z;
126                         this.w = w;
127                 }
128
129                 [Acceleration (AccelMode.SSE1)]
130                 public static unsafe Vector4f operator & (Vector4f v1, Vector4f v2)
131                 {
132                         Vector4f res = new Vector4f ();
133                         int *a = (int*)&v1;
134                         int *b = (int*)&v2;
135                         int *c = (int*)&res;
136                         *c++ = *a++ & *b++;
137                         *c++ = *a++ & *b++;
138                         *c++ = *a++ & *b++;
139                         *c = *a & *b;
140                         return res;
141                 }
142
143                 [Acceleration (AccelMode.SSE1)]
144                 public static unsafe Vector4f operator | (Vector4f v1, Vector4f v2)
145                 {
146                         Vector4f res = new Vector4f ();
147                         int *a = (int*)&v1;
148                         int *b = (int*)&v2;
149                         int *c = (int*)&res;
150                         *c++ = *a++ | *b++;
151                         *c++ = *a++ | *b++;
152                         *c++ = *a++ | *b++;
153                         *c = *a | *b;
154                         return res;
155                 }
156
157                 [Acceleration (AccelMode.SSE1)]
158                 public static unsafe Vector4f operator ^ (Vector4f v1, Vector4f v2)
159                 {
160                         Vector4f res = new Vector4f ();
161                         int *a = (int*)&v1;
162                         int *b = (int*)&v2;
163                         int *c = (int*)&res;
164                         *c++ = *a++ ^ *b++;
165                         *c++ = *a++ ^ *b++;
166                         *c++ = *a++ ^ *b++;
167                         *c = *a ^ *b;
168                         return res;
169                 }
170
171                 [Acceleration (AccelMode.SSE1)]
172                 public static Vector4f operator + (Vector4f v1, Vector4f v2)
173                 {
174                         return new Vector4f (v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w);
175                 }
176
177                 [Acceleration (AccelMode.SSE1)]
178                 public static Vector4f operator - (Vector4f v1, Vector4f v2)
179                 {
180                         return new Vector4f (v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w);
181                 }
182
183                 [Acceleration (AccelMode.SSE1)]
184                 public static Vector4f operator * (Vector4f v1, Vector4f v2)
185                 {
186                         return new Vector4f (v1.x * v2.x, v1.y * v2.y, v1.z * v2.z, v1.w * v2.w);
187                 }
188
189                 [Acceleration (AccelMode.SSE1)]
190                 public static Vector4f operator / (Vector4f v1, Vector4f v2)
191                 {
192                         return new Vector4f (v1.x / v2.x, v1.y / v2.y, v1.z / v2.z, v1.w / v2.w);
193                 }
194
195                 [Acceleration (AccelMode.SSE1)]
196                 public static unsafe Vector4f AndNot (Vector4f v1, Vector4f v2)
197                 {
198                         Vector4f res = new Vector4f ();
199                         int *a = (int*)&v1;
200                         int *b = (int*)&v2;
201                         int *c = (int*)&res;
202                         *c++ = ~*a++ & *b++;
203                         *c++ = ~*a++ & *b++;
204                         *c++ = ~*a++ & *b++;
205                         *c = ~*a & *b;
206                         return res;
207                 }
208
209                 [Acceleration (AccelMode.SSE1)]
210                 public static Vector4f Sqrt (Vector4f v1)
211                 {
212                         return new Vector4f ((float)System.Math.Sqrt ((float)v1.x),
213                                                                 (float)System.Math.Sqrt ((float)v1.y),
214                                                                 (float)System.Math.Sqrt ((float)v1.z),
215                                                                 (float)System.Math.Sqrt ((float)v1.w));
216                 }
217
218                 [Acceleration (AccelMode.SSE1)]
219                 public static Vector4f InvSqrt (Vector4f v1)
220                 {
221                         return new Vector4f ((float)(1.0 / System.Math.Sqrt ((float)v1.x)),
222                                                                 (float)(1.0 / System.Math.Sqrt ((float)v1.y)),
223                                                                 (float)(1.0 / System.Math.Sqrt ((float)v1.z)),
224                                                                 (float)(1.0 / System.Math.Sqrt ((float)v1.w)));
225                 }
226
227                 [Acceleration (AccelMode.SSE1)]
228                 public static Vector4f Reciprocal (Vector4f v1)
229                 {
230                         return new Vector4f (1.0f / v1.x, 1.0f / v1.y, 1.0f / v1.z, 1.0f / v1.w);
231                 }
232
233                 [Acceleration (AccelMode.SSE1)]
234                 public static Vector4f Max (Vector4f v1, Vector4f v2)
235                 {
236                         return new Vector4f (System.Math.Max (v1.x, v2.x),
237                                                                 System.Math.Max (v1.y, v2.y),
238                                                                 System.Math.Max (v1.z, v2.z),
239                                                                 System.Math.Max (v1.w, v2.w));
240                 }
241
242                 [Acceleration (AccelMode.SSE1)]
243                 public static Vector4f Min (Vector4f v1, Vector4f v2)
244                 {
245                         return new Vector4f (System.Math.Min (v1.x, v2.x),
246                                                                 System.Math.Min (v1.y, v2.y),
247                                                                 System.Math.Min (v1.z, v2.z),
248                                                                 System.Math.Min (v1.w, v2.w));
249                 }
250
251                 [Acceleration (AccelMode.SSE3)]
252                 public static Vector4f HorizontalAdd (Vector4f v1, Vector4f v2)
253                 {
254                         return new Vector4f (v1.x + v1.y, v1.z + v1.w, v2.x + v2.y, v2.z + v2.w);
255                 }
256
257                 [Acceleration (AccelMode.SSE3)]
258                 public static Vector4f AddSub (Vector4f v1, Vector4f v2)
259                 {
260                         return new Vector4f (v1.x - v2.x, v1.y + v2.y, v1.z - v2.z, v1.w + v2.w);
261                 }
262
263                 [Acceleration (AccelMode.SSE3)]
264                 public static Vector4f HorizontalSub (Vector4f v1, Vector4f v2)
265                 {
266                         return new Vector4f (v1.x - v1.y, v1.z - v1.w, v2.x - v2.y, v2.z - v2.w);
267                 }
268
269                 [Acceleration (AccelMode.SSE1)]
270                 public static Vector4f InterleaveHigh (Vector4f v1, Vector4f v2)
271                 {
272                         return new Vector4f (v1.z, v2.z, v1.w, v2.w);
273                 }
274
275                 [Acceleration (AccelMode.SSE1)]
276                 public static Vector4f InterleaveLow (Vector4f v1, Vector4f v2)
277                 {
278                         return new Vector4f (v1.x, v2.x, v1.y, v2.y);
279                 }
280
281                 /*Same as a == b. */
282                 [Acceleration (AccelMode.SSE1)]
283                 public unsafe static Vector4f CompareEqual (Vector4f v1, Vector4f v2)
284                 {
285                         Vector4f res = new Vector4f ();
286                         int *c = (int*)&res;
287                         *c++ = v1.x == v2.x ? -1 : 0;
288                         *c++ = v1.y == v2.y ? -1 : 0;
289                         *c++ = v1.z == v2.z ? -1 : 0;
290                         *c = v1.w == v2.w ? -1 : 0;
291                         return res;             }
292
293                 /*Same as a < b. */
294                 [Acceleration (AccelMode.SSE1)]
295                 public unsafe static Vector4f CompareLessThan (Vector4f v1, Vector4f v2)
296                 {
297                         Vector4f res = new Vector4f ();
298                         int *c = (int*)&res;
299                         *c++ = v1.x < v2.x ? -1 : 0;
300                         *c++ = v1.y < v2.y ? -1 : 0;
301                         *c++ = v1.z < v2.z ? -1 : 0;
302                         *c = v1.w < v2.w ? -1 : 0;
303                         return res;
304                 }
305
306                 /*Same as a <= b. */
307                 [Acceleration (AccelMode.SSE1)]
308                 public unsafe static Vector4f CompareLessEqual (Vector4f v1, Vector4f v2)
309                 {
310                         Vector4f res = new Vector4f ();
311                         int *c = (int*)&res;
312                         *c++ = v1.x <= v2.x ? -1 : 0;
313                         *c++ = v1.y <= v2.y ? -1 : 0;
314                         *c++ = v1.z <= v2.z ? -1 : 0;
315                         *c = v1.w <= v2.w ? -1 : 0;
316                         return res;             }
317
318                 /*Same float.IsNaN (a) || float.IsNaN (b). */
319                 [Acceleration (AccelMode.SSE1)]
320                 public unsafe static Vector4f CompareUnordered (Vector4f v1, Vector4f v2)
321                 {
322                         Vector4f res = new Vector4f ();
323                         int *c = (int*)&res;
324                         *c++ = float.IsNaN (v1.x) || float.IsNaN (v2.x) ? -1 : 0;
325                         *c++ = float.IsNaN (v1.y) || float.IsNaN (v2.y) ? -1 : 0;
326                         *c++ = float.IsNaN (v1.z) || float.IsNaN (v2.z) ? -1 : 0;
327                         *c = float.IsNaN (v1.w) || float.IsNaN (v2.w) ? -1 : 0;
328                         return res;             }
329
330                 /*Same as a != b. */
331                 [Acceleration (AccelMode.SSE1)]
332                 public unsafe static Vector4f CompareNotEqual (Vector4f v1, Vector4f v2)
333                 {
334                         Vector4f res = new Vector4f ();
335                         int *c = (int*)&res;
336                         *c++ = v1.x != v2.x ? -1 : 0;
337                         *c++ = v1.y != v2.y ? -1 : 0;
338                         *c++ = v1.z != v2.z ? -1 : 0;
339                         *c = v1.w != v2.w ? -1 : 0;
340                         return res;
341                 }
342
343                 /*Same as !(a < b). */
344                 [Acceleration (AccelMode.SSE1)]
345                 public unsafe static Vector4f CompareNotLessThan (Vector4f v1, Vector4f v2)
346                 {
347                         Vector4f res = new Vector4f ();
348                         int *c = (int*)&res;
349                         *c++ = v1.x < v2.x ? 0 : -1;
350                         *c++ = v1.y < v2.y ? 0 : -1;
351                         *c++ = v1.z < v2.z ? 0 : -1;
352                         *c = v1.w < v2.w ? 0 : -1;
353                         return res;
354                 }
355
356                 /*Same as !(a <= b). */
357                 [Acceleration (AccelMode.SSE1)]
358                 public unsafe static Vector4f CompareNotLessEqual (Vector4f v1, Vector4f v2)
359                 {
360                         Vector4f res = new Vector4f ();
361                         int *c = (int*)&res;
362                         *c++ = v1.x <= v2.x ? 0 : -1;
363                         *c++ = v1.y <= v2.y ? 0 : -1;
364                         *c++ = v1.z <= v2.z ? 0 : -1;
365                         *c = v1.w <= v2.w ? 0 : -1;
366                         return res;
367                 }
368
369                 /*Same !float.IsNaN (a) && !float.IsNaN (b). */
370                 [Acceleration (AccelMode.SSE1)]
371                 public unsafe static Vector4f CompareOrdered (Vector4f v1, Vector4f v2)
372                 {
373                         Vector4f res = new Vector4f ();
374                         int *c = (int*)&res;
375                         *c++ = !float.IsNaN (v1.x) && !float.IsNaN (v2.x) ? -1 : 0;
376                         *c++ = !float.IsNaN (v1.y) && !float.IsNaN (v2.y) ? -1 : 0;
377                         *c++ = !float.IsNaN (v1.z) && !float.IsNaN (v2.z) ? -1 : 0;
378                         *c = !float.IsNaN (v1.w) && !float.IsNaN (v2.w) ? -1 : 0;
379                         return res;             }
380
381                 [Acceleration (AccelMode.SSE3)]
382                 public static Vector4f DuplicateLow (Vector4f v1)
383                 {
384                         return new Vector4f (v1.x, v1.x, v1.z, v1.z);
385                 }
386
387                 [Acceleration (AccelMode.SSE3)]
388                 public static Vector4f DuplicateHigh (Vector4f v1)
389                 {
390                         return new Vector4f (v1.y, v1.y, v1.w, v1.w);
391                 }
392
393                 /*
394                 The sel argument must be a value combination of ShuffleSel flags.
395                 */
396                 [Acceleration (AccelMode.SSE2)]
397                 public static unsafe Vector4f Shuffle (Vector4f v1, ShuffleSel sel)
398                 {
399                         float *ptr = (float*)&v1;
400                         int idx = (int)sel;
401                         return new Vector4f (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3)));
402                 }
403
404                 [Acceleration (AccelMode.SSE1)]
405                 public static unsafe explicit operator Vector2d (Vector4f v)
406                 {
407                         Vector2d* p = (Vector2d*)&v;
408                         return *p;
409                 }
410
411                 [Acceleration (AccelMode.SSE1)]
412                 public static unsafe explicit operator Vector2l (Vector4f v)
413                 {
414                         Vector2l* p = (Vector2l*)&v;
415                         return *p;
416                 }
417
418                 [Acceleration (AccelMode.SSE1)]
419                 [CLSCompliant(false)]
420                 public static unsafe explicit operator Vector2ul (Vector4f v)
421                 {
422                         Vector2ul* p = (Vector2ul*)&v;
423                         return *p;
424                 }
425
426                 [Acceleration (AccelMode.SSE1)]
427                 public static unsafe explicit operator Vector4i (Vector4f v)
428                 {
429                         Vector4i* p = (Vector4i*)&v;
430                         return *p;
431                 }
432
433                 [Acceleration (AccelMode.SSE1)]
434                 [CLSCompliant(false)]
435                 public static unsafe explicit operator Vector4ui (Vector4f v)
436                 {
437                         Vector4ui* p = (Vector4ui*)&v;
438                         return *p;
439                 }
440
441                 [Acceleration (AccelMode.SSE1)]
442                 public static unsafe explicit operator Vector8s (Vector4f v)
443                 {
444                         Vector8s* p = (Vector8s*)&v;
445                         return *p;
446                 }
447
448                 [Acceleration (AccelMode.SSE1)]
449                 [CLSCompliant(false)]
450                 public static unsafe explicit operator Vector8us (Vector4f v)
451                 {
452                         Vector8us* p = (Vector8us*)&v;
453                         return *p;
454                 }
455
456                 [Acceleration (AccelMode.SSE1)]
457                 [CLSCompliant(false)]
458                 public static unsafe explicit operator Vector16sb (Vector4f v)
459                 {
460                         Vector16sb* p = (Vector16sb*)&v;
461                         return *p;
462                 }
463
464                 [Acceleration (AccelMode.SSE1)]
465                 public static unsafe explicit operator Vector16b (Vector4f v)
466                 {
467                         Vector16b* p = (Vector16b*)&v;
468                         return *p;
469                 }
470
471                 [Acceleration (AccelMode.SSE1)]
472                 public static Vector4f LoadAligned (ref Vector4f v)
473                 {
474                         return v;
475                 }
476
477                 [Acceleration (AccelMode.SSE1)]
478                 public static void StoreAligned (ref Vector4f res, Vector4f val)
479                 {
480                         res = val;
481                 }
482
483                 [CLSCompliant(false)]
484                 [Acceleration (AccelMode.SSE1)]
485                 public static unsafe Vector4f LoadAligned (Vector4f *v)
486                 {
487                         return *v;
488                 }
489
490                 [CLSCompliant(false)]
491                 [Acceleration (AccelMode.SSE1)]
492                 public static unsafe void StoreAligned (Vector4f *res, Vector4f val)
493                 {
494                         *res = val;
495                 }
496
497                 [Acceleration (AccelMode.SSE1)]
498                 [CLSCompliant(false)]
499                 public static void PrefetchTemporalAllCacheLevels (ref Vector4f res)
500                 {
501                 }
502
503                 [Acceleration (AccelMode.SSE1)]
504                 [CLSCompliant(false)]
505                 public static void PrefetchTemporal1stLevelCache (ref Vector4f res)
506                 {
507                 }
508
509                 [Acceleration (AccelMode.SSE1)]
510                 [CLSCompliant(false)]
511                 public static void PrefetchTemporal2ndLevelCache (ref Vector4f res)
512                 {
513                 }
514
515                 [Acceleration (AccelMode.SSE1)]
516                 [CLSCompliant(false)]
517                 public static void PrefetchNonTemporal (ref Vector4f res)
518                 {
519                 }
520
521                 [Acceleration (AccelMode.SSE1)]
522                 [CLSCompliant(false)]
523                 public static unsafe void PrefetchTemporalAllCacheLevels (Vector4f *res)
524                 {
525                 }
526
527                 [Acceleration (AccelMode.SSE1)]
528                 [CLSCompliant(false)]
529                 public static unsafe void PrefetchTemporal1stLevelCache (Vector4f *res)
530                 {
531                 }
532
533                 [Acceleration (AccelMode.SSE1)]
534                 [CLSCompliant(false)]
535                 public static unsafe void PrefetchTemporal2ndLevelCache (Vector4f *res)
536                 {
537                 }
538
539                 [Acceleration (AccelMode.SSE1)]
540                 [CLSCompliant(false)]
541                 public static unsafe void PrefetchNonTemporal (Vector4f *res)
542                 {
543                 }
544         }
545 }