4 // Rodrigo Kumpera (rkumpera@novell.com)
6 // (C) 2008 Novell, Inc. (http://www.novell.com)
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 public static class VectorOperations
33 /* ==== Bitwise operations ==== */
35 [Acceleration (AccelMode.SSE1)]
36 public static unsafe Vector4f AndNot (this Vector4f v1, Vector4f v2)
38 Vector4f res = new Vector4f ();
49 [Acceleration (AccelMode.SSE2)]
50 public static unsafe Vector2d AndNot (this Vector2d v1, Vector2d v2)
52 Vector2d res = new Vector2d ();
63 [Acceleration (AccelMode.SSE2)]
64 [CLSCompliant (false)]
65 public static unsafe Vector4ui ArithmeticRightShift (this Vector4ui v1, int amount)
67 Vector4ui res = new Vector4ui ();
70 for (int i = 0; i < 4; ++i)
71 *b++ = (uint)((int)(*a++) >> amount);
75 [Acceleration (AccelMode.SSE2)]
76 [CLSCompliant (false)]
77 public static unsafe Vector8us ArithmeticRightShift (this Vector8us va, int amount)
79 Vector8us res = new Vector8us ();
82 for (int i = 0; i < 8; ++i)
83 *b++ = (ushort)((short)(*a++) >> amount);
87 [Acceleration (AccelMode.SSE2)]
88 public static unsafe Vector2l LogicalRightShift (this Vector2l v1, int amount)
90 return new Vector2l ((long)((ulong)(v1.x) >> amount), (long)((ulong)(v1.y) >> amount));
93 [Acceleration (AccelMode.SSE2)]
94 public static unsafe Vector4i LogicalRightShift (this Vector4i v1, int amount)
96 Vector4i res = new Vector4i ();
99 for (int i = 0; i < 4; ++i)
100 *b++ = (int)((uint)(*a++) >> amount);
104 [Acceleration (AccelMode.SSE2)]
105 public static unsafe Vector8s LogicalRightShift (this Vector8s va, int amount)
107 Vector8s res = new Vector8s ();
110 for (int i = 0; i < 8; ++i)
111 *b++ = (short)((ushort)(*a++) >> amount);
115 [Acceleration (AccelMode.SSE2)]
116 [CLSCompliant (false)]
117 public static unsafe int ExtractByteMask (this Vector16sb va) {
119 sbyte *a = (sbyte*)&va;
120 for (int i = 0; i < 16; ++i)
121 res |= (*a++ & 0x80) >> 7 << i;
125 [Acceleration (AccelMode.SSE2)]
126 public static unsafe int ExtractByteMask (this Vector16b va) {
128 byte *a = (byte*)&va;
129 for (int i = 0; i < 16; ++i)
130 res |= (*a++ & 0x80) >> 7 << i;
135 /* ==== Math operations ==== */
137 [Acceleration (AccelMode.SSE2)]
138 public static unsafe Vector8s AddWithSaturation (this Vector8s va, Vector8s vb) {
139 Vector8s res = new Vector8s ();
143 for (int i = 0; i < 8; ++i)
144 *c++ = (short) System.Math.Max (System.Math.Min (*a++ + *b++, short.MaxValue), short.MinValue);
148 [Acceleration (AccelMode.SSE2)]
149 [CLSCompliant (false)]
150 public static unsafe Vector8us AddWithSaturation (this Vector8us va, Vector8us vb) {
151 Vector8us res = new Vector8us ();
155 for (int i = 0; i < 8; ++i)
156 *c++ = (ushort) System.Math.Min (*a++ + *b++, ushort.MaxValue);
160 [Acceleration (AccelMode.SSE2)]
161 [CLSCompliant (false)]
162 public static unsafe Vector16sb AddWithSaturation (this Vector16sb va, Vector16sb vb) {
163 Vector16sb res = new Vector16sb ();
167 for (int i = 0; i < 16; ++i)
168 *c++ = (sbyte) System.Math.Max (System.Math.Min (*a++ + *b++, sbyte.MaxValue), sbyte.MinValue);
172 [Acceleration (AccelMode.SSE2)]
173 public static unsafe Vector16b AddWithSaturation (this Vector16b va, Vector16b vb) {
174 Vector16b res = new Vector16b ();
178 for (int i = 0; i < 16; ++i)
179 *c++ = (byte) System.Math.Min (*a++ + *b++, byte.MaxValue);
183 [Acceleration (AccelMode.SSE2)]
184 public static unsafe Vector8s SubtractWithSaturation (this Vector8s va, Vector8s vb) {
185 Vector8s res = new Vector8s ();
189 for (int i = 0; i < 8; ++i)
190 *c++ = (short) System.Math.Max (System.Math.Min (*a++ - *b++, short.MaxValue), short.MinValue); ;
194 [Acceleration (AccelMode.SSE2)]
195 [CLSCompliant (false)]
196 public static unsafe Vector8us SubtractWithSaturation (this Vector8us va, Vector8us vb) {
197 Vector8us res = new Vector8us ();
201 for (int i = 0; i < 8; ++i)
202 *c++ = (ushort) System.Math.Max (*a++ - *b++, 0);
206 [Acceleration (AccelMode.SSE2)]
207 [CLSCompliant (false)]
208 public static unsafe Vector16sb SubtractWithSaturation (this Vector16sb va, Vector16sb vb) {
209 Vector16sb res = new Vector16sb ();
213 for (int i = 0; i < 16; ++i)
214 *c++ = (sbyte) System.Math.Max (System.Math.Min (*a++ - *b++, sbyte.MaxValue), sbyte.MinValue);
218 [Acceleration (AccelMode.SSE2)]
219 public static unsafe Vector16b SubtractWithSaturation (this Vector16b va, Vector16b vb) {
220 Vector16b res = new Vector16b ();
224 for (int i = 0; i < 16; ++i)
225 *c++ = (byte) System.Math.Max (*a++ - *b++, 0);
229 [Acceleration (AccelMode.SSE2)]
230 public static unsafe Vector8s MultiplyStoreHigh (this Vector8s va, Vector8s vb) {
231 Vector8s res = new Vector8s ();
235 for (int i = 0; i < 8; ++i)
236 *c++ = (short)((int)*a++ * (int)*b++ >> 16);
240 [Acceleration (AccelMode.SSE2)]
241 [CLSCompliant (false)]
242 public static unsafe Vector8us MultiplyStoreHigh (this Vector8us va, Vector8us vb) {
243 Vector8us res = new Vector8us ();
247 for (int i = 0; i < 8; ++i)
248 *c++ = (ushort)((uint)*a++ * (uint)*b++ >> 16);
252 [CLSCompliant(false)]
253 [Acceleration (AccelMode.SSE2)]
254 public static unsafe Vector8us SumOfAbsoluteDifferences (this Vector16b va, Vector16sb vb) {
255 Vector8us res = new Vector8us ();
257 sbyte *b = (sbyte*)&vb;
260 for (int i = 0; i < 8; ++i)
261 tmp += System.Math.Abs ((int)*a++ - (int)*b++);
262 res.V0 = (ushort)tmp;
265 for (int i = 0; i < 8; ++i)
266 tmp += System.Math.Abs ((int)*a++ - (int)*b++);
267 res.V4 = (ushort)tmp;
272 [Acceleration (AccelMode.SSE1)]
273 public static Vector4f Sqrt (this Vector4f v1)
275 return new Vector4f ((float)System.Math.Sqrt ((float)v1.x),
276 (float)System.Math.Sqrt ((float)v1.y),
277 (float)System.Math.Sqrt ((float)v1.z),
278 (float)System.Math.Sqrt ((float)v1.w));
281 [Acceleration (AccelMode.SSE1)]
282 public static Vector4f InvSqrt (this Vector4f v1)
284 return new Vector4f ((float)(1.0 / System.Math.Sqrt ((float)v1.x)),
285 (float)(1.0 / System.Math.Sqrt ((float)v1.y)),
286 (float)(1.0 / System.Math.Sqrt ((float)v1.z)),
287 (float)(1.0 / System.Math.Sqrt ((float)v1.w)));
290 [Acceleration (AccelMode.SSE1)]
291 public static Vector4f Reciprocal (this Vector4f v1)
293 return new Vector4f (1.0f / v1.x, 1.0f / v1.y, 1.0f / v1.z, 1.0f / v1.w);
296 [Acceleration (AccelMode.SSE2)]
297 public static Vector2d Sqrt (this Vector2d v1)
299 return new Vector2d (System.Math.Sqrt (v1.x),
300 System.Math.Sqrt (v1.y));
303 [Acceleration (AccelMode.SSE2)]
304 [CLSCompliant (false)]
305 public static unsafe Vector8us Average (this Vector8us va, Vector8us vb) {
306 Vector8us res = new Vector8us ();
310 for (int i = 0; i < 8; ++i)
311 *c++ = (ushort) ((*a++ + *b++ + 1) >> 1);
315 [Acceleration (AccelMode.SSE2)]
316 public static unsafe Vector16b Average (this Vector16b va, Vector16b vb) {
317 Vector16b res = new Vector16b ();
321 for (int i = 0; i < 16; ++i)
322 *c++ = (byte) ((*a++ + *b++ + 1) >> 1);
326 [Acceleration (AccelMode.SSE1)]
327 public static Vector4f Max (this Vector4f v1, Vector4f v2)
329 return new Vector4f (System.Math.Max (v1.x, v2.x),
330 System.Math.Max (v1.y, v2.y),
331 System.Math.Max (v1.z, v2.z),
332 System.Math.Max (v1.w, v2.w));
335 [Acceleration (AccelMode.SSE2)]
336 public static Vector2d Max (this Vector2d v1, Vector2d v2)
338 return new Vector2d (System.Math.Max (v1.x, v2.x),
339 System.Math.Max (v1.y, v2.y));
342 [Acceleration (AccelMode.SSE41)]
343 public static Vector4i Max (this Vector4i v1, Vector4i v2)
345 return new Vector4i (System.Math.Max (v1.x, v2.x), System.Math.Max (v1.y, v2.y), System.Math.Max (v1.z, v2.z), System.Math.Max (v1.w, v2.w));
348 [Acceleration (AccelMode.SSE41)]
349 [CLSCompliant (false)]
350 public static Vector4ui Max (this Vector4ui v1, Vector4ui v2)
352 return new Vector4ui (System.Math.Max (v1.x, v2.x), System.Math.Max (v1.y, v2.y), System.Math.Max (v1.z, v2.z), System.Math.Max (v1.w, v2.w));
355 [Acceleration (AccelMode.SSE2)]
356 public static unsafe Vector8s Max (this Vector8s va, Vector8s vb) {
357 Vector8s res = new Vector8s ();
361 for (int i = 0; i < 8; ++i)
362 *c++ = (short) System.Math.Max (*a++, *b++);
366 [Acceleration (AccelMode.SSE41)]
367 [CLSCompliant (false)]
368 public static unsafe Vector8us Max (this Vector8us va, Vector8us vb) {
369 Vector8us res = new Vector8us ();
373 for (int i = 0; i < 8; ++i)
374 *c++ = (ushort) System.Math.Max (*a++, *b++);
378 [Acceleration (AccelMode.SSE1)]
379 public static Vector4f Min (this Vector4f v1, Vector4f v2)
381 return new Vector4f (System.Math.Min (v1.x, v2.x),
382 System.Math.Min (v1.y, v2.y),
383 System.Math.Min (v1.z, v2.z),
384 System.Math.Min (v1.w, v2.w));
387 [Acceleration (AccelMode.SSE41)]
388 [CLSCompliant (false)]
389 public static unsafe Vector16sb Max (this Vector16sb va, Vector16sb vb) {
390 Vector16sb res = new Vector16sb ();
394 for (int i = 0; i < 16; ++i)
395 *c++ = (sbyte) System.Math.Max (*a++, *b++);
399 [Acceleration (AccelMode.SSE2)]
400 public static unsafe Vector16b Max (this Vector16b va, Vector16b vb) {
401 Vector16b res = new Vector16b ();
405 for (int i = 0; i < 16; ++i)
406 *c++ = (byte) System.Math.Max(*a++, *b++);
410 [Acceleration (AccelMode.SSE2)]
411 public static Vector2d Min (this Vector2d v1, Vector2d v2)
413 return new Vector2d (System.Math.Min (v1.x, v2.x),
414 System.Math.Min (v1.y, v2.y));
417 [Acceleration (AccelMode.SSE41)]
418 public static Vector4i Min (this Vector4i v1, Vector4i v2)
420 return new Vector4i (System.Math.Min (v1.x, v2.x), System.Math.Min (v1.y, v2.y), System.Math.Min (v1.z, v2.z), System.Math.Min (v1.w, v2.w));
423 [Acceleration (AccelMode.SSE41)]
424 [CLSCompliant (false)]
425 public static Vector4ui Min (this Vector4ui v1, Vector4ui v2)
427 return new Vector4ui (System.Math.Min (v1.x, v2.x), System.Math.Min (v1.y, v2.y), System.Math.Min (v1.z, v2.z), System.Math.Min (v1.w, v2.w));
430 [Acceleration (AccelMode.SSE2)]
431 public static unsafe Vector8s Min (this Vector8s va, Vector8s vb) {
432 Vector8s res = new Vector8s ();
436 for (int i = 0; i < 8; ++i)
437 *c++ = (short) System.Math.Min (*a++, *b++);
441 [Acceleration (AccelMode.SSE41)]
442 [CLSCompliant (false)]
443 public static unsafe Vector8us Min (this Vector8us va, Vector8us vb) {
444 Vector8us res = new Vector8us ();
448 for (int i = 0; i < 8; ++i)
449 *c++ = (ushort) System.Math.Min (*a++, *b++);
453 [Acceleration (AccelMode.SSE41)]
454 [CLSCompliant (false)]
455 public static unsafe Vector16sb Min (this Vector16sb va, Vector16sb vb) {
456 Vector16sb res = new Vector16sb ();
460 for (int i = 0; i < 16; ++i)
461 *c++ = (sbyte) System.Math.Min(*a++, *b++);
465 [Acceleration (AccelMode.SSE2)]
466 public static unsafe Vector16b Min (this Vector16b va, Vector16b vb) {
467 Vector16b res = new Vector16b ();
471 for (int i = 0; i < 16; ++i)
472 *c++ = (byte) System.Math.Min(*a++, *b++);
477 /* ==== Horizontal operations ==== */
479 [Acceleration (AccelMode.SSE3)]
480 public static Vector4f HorizontalAdd (this Vector4f v1, Vector4f v2)
482 return new Vector4f (v1.x + v1.y, v1.z + v1.w, v2.x + v2.y, v2.z + v2.w);
484 [Acceleration (AccelMode.SSE3)]
485 public static Vector2d HorizontalAdd (this Vector2d v1, Vector2d v2)
487 return new Vector2d (v1.x + v1.y, v2.x + v2.y);
490 [Acceleration (AccelMode.SSE3)]
491 public static Vector4f HorizontalSub (this Vector4f v1, Vector4f v2)
493 return new Vector4f (v1.x - v1.y, v1.z - v1.w, v2.x - v2.y, v2.z - v2.w);
496 [Acceleration (AccelMode.SSE3)]
497 public static Vector2d HorizontalSub (this Vector2d v1, Vector2d v2)
499 return new Vector2d (v1.x - v1.y, v2.x - v2.y);
502 [Acceleration (AccelMode.SSE3)]
503 public static Vector4f AddSub (this Vector4f v1, Vector4f v2)
505 return new Vector4f (v1.x - v2.x, v1.y + v2.y, v1.z - v2.z, v1.w + v2.w);
508 [Acceleration (AccelMode.SSE3)]
509 public static Vector2d AddSub (this Vector2d v1, Vector2d v2)
511 return new Vector2d (v1.x - v2.x, v1.y + v2.y);
514 /* ==== Compare methods ==== */
517 [Acceleration (AccelMode.SSE1)]
518 public unsafe static Vector4f CompareEqual (this Vector4f v1, Vector4f v2)
520 Vector4f res = new Vector4f ();
522 *c++ = v1.x == v2.x ? -1 : 0;
523 *c++ = v1.y == v2.y ? -1 : 0;
524 *c++ = v1.z == v2.z ? -1 : 0;
525 *c = v1.w == v2.w ? -1 : 0;
530 [Acceleration (AccelMode.SSE2)]
531 public unsafe static Vector2d CompareEqual (this Vector2d v1, Vector2d v2)
533 Vector2d res = new Vector2d ();
534 long *c = (long*)&res;
535 *c++ = v1.x == v2.x ? -1 : 0;
536 *c = v1.y == v2.y ? -1 : 0;
540 [Acceleration (AccelMode.SSE41)]
541 public static Vector2l CompareEqual (this Vector2l v1, Vector2l v2)
543 return new Vector2l ((long)(v1.x == v2.x ? -1 : 0), (long)(v1.y == v2.y ? -1 : 0));
546 [Acceleration (AccelMode.SSE41)]
547 [CLSCompliant (false)]
548 public static Vector2ul CompareEqual (this Vector2ul v1, Vector2ul v2)
550 return new Vector2ul ((ulong)(v1.x == v2.x ? -1 : 0), (ulong)(v1.y == v2.y ? -1 : 0));
553 [Acceleration (AccelMode.SSE2)]
554 public static Vector4i CompareEqual (this Vector4i v1, Vector4i v2)
556 return new Vector4i ((int)(v1.x == v2.x ? -1 : 0), (int)(v1.y == v2.y ? -1 : 0), (int)(v1.z == v2.z ? -1 : 0), (int)(v1.w == v2.w ? -1 : 0));
559 [Acceleration (AccelMode.SSE2)]
560 [CLSCompliant (false)]
561 public static Vector4ui CompareEqual (this Vector4ui v1, Vector4ui v2)
563 return new Vector4ui ((uint)(v1.x == v2.x ? -1 : 0), (uint)(v1.y == v2.y ? -1 : 0), (uint)(v1.z == v2.z ? -1 : 0), (uint)(v1.w == v2.w ? -1 : 0));
566 [Acceleration (AccelMode.SSE2)]
567 public static unsafe Vector8s CompareEqual (this Vector8s va, Vector8s vb) {
568 Vector8s res = new Vector8s ();
572 for (int i = 0; i < 8; ++i)
573 *c++ = (short) (*a++ == *b++ ? -1 : 0);
577 [Acceleration (AccelMode.SSE2)]
578 [CLSCompliant (false)]
579 public static unsafe Vector8us CompareEqual (this Vector8us va, Vector8us vb) {
580 Vector8us res = new Vector8us ();
584 for (int i = 0; i < 8; ++i)
585 *c++ = (ushort) (*a++ == *b++ ? -1 : 0);
589 [Acceleration (AccelMode.SSE2)]
590 [CLSCompliant (false)]
591 public static unsafe Vector16sb CompareEqual (this Vector16sb va, Vector16sb vb) {
592 Vector16sb res = new Vector16sb ();
596 for (int i = 0; i < 16; ++i)
597 *c++ = (sbyte) (*a++ == *b++ ? -1 : 0);
601 [Acceleration (AccelMode.SSE2)]
602 public static unsafe Vector16b CompareEqual (this Vector16b va, Vector16b vb) {
603 Vector16b res = new Vector16b ();
607 for (int i = 0; i < 16; ++i)
608 *c++ = (byte) (*a++ == *b++ ? -1 : 0);
613 [Acceleration (AccelMode.SSE1)]
614 public unsafe static Vector4f CompareLessThan (this Vector4f v1, Vector4f v2)
616 Vector4f res = new Vector4f ();
618 *c++ = v1.x < v2.x ? -1 : 0;
619 *c++ = v1.y < v2.y ? -1 : 0;
620 *c++ = v1.z < v2.z ? -1 : 0;
621 *c = v1.w < v2.w ? -1 : 0;
626 [Acceleration (AccelMode.SSE2)]
627 public unsafe static Vector2d CompareLessThan (this Vector2d v1, Vector2d v2)
629 Vector2d res = new Vector2d ();
630 long *c = (long*)&res;
631 *c++ = v1.x < v2.x ? -1 : 0;
632 *c = v1.y < v2.y ? -1 : 0;
637 [Acceleration (AccelMode.SSE1)]
638 public unsafe static Vector4f CompareLessEqual (this Vector4f v1, Vector4f v2)
640 Vector4f res = new Vector4f ();
642 *c++ = v1.x <= v2.x ? -1 : 0;
643 *c++ = v1.y <= v2.y ? -1 : 0;
644 *c++ = v1.z <= v2.z ? -1 : 0;
645 *c = v1.w <= v2.w ? -1 : 0;
650 [Acceleration (AccelMode.SSE2)]
651 public unsafe static Vector2d CompareLessEqual (this Vector2d v1, Vector2d v2)
653 Vector2d res = new Vector2d ();
654 long *c = (long*)&res;
655 *c++ = v1.x <= v2.x ? -1 : 0;
656 *c = v1.y <= v2.y ? -1 : 0;
660 [Acceleration (AccelMode.SSE42)]
661 public static Vector2l CompareGreaterThan (this Vector2l v1, Vector2l v2)
663 return new Vector2l ((long)(v1.x > v2.x ? -1 : 0), (long)(v1.y > v2.y ? -1 : 0));
666 [Acceleration (AccelMode.SSE2)]
667 public static Vector4i CompareGreaterThan (this Vector4i v1, Vector4i v2)
669 return new Vector4i ((int)(v1.x > v2.x ? -1 : 0), (int)(v1.y > v2.y ? -1 : 0), (int)(v1.z > v2.z ? -1 : 0), (int)(v1.w > v2.w ? -1 : 0));
672 [Acceleration (AccelMode.SSE2)]
673 public static unsafe Vector8s CompareGreaterThan (this Vector8s va, Vector8s vb) {
674 Vector8s res = new Vector8s ();
678 for (int i = 0; i < 8; ++i)
679 *c++ = (short) (*a++ > *b++ ? -1 : 0);
683 [Acceleration (AccelMode.SSE2)]
684 [CLSCompliant (false)]
685 public static unsafe Vector16sb CompareGreaterThan (this Vector16sb va, Vector16sb vb) {
686 Vector16sb res = new Vector16sb ();
690 for (int i = 0; i < 16; ++i)
691 *c++ = (sbyte) (*a++ > *b++ ? -1 : 0);
695 /*Same float.IsNaN (a) || float.IsNaN (b). */
696 [Acceleration (AccelMode.SSE1)]
697 public unsafe static Vector4f CompareUnordered (this Vector4f v1, Vector4f v2)
699 Vector4f res = new Vector4f ();
701 *c++ = float.IsNaN (v1.x) || float.IsNaN (v2.x) ? -1 : 0;
702 *c++ = float.IsNaN (v1.y) || float.IsNaN (v2.y) ? -1 : 0;
703 *c++ = float.IsNaN (v1.z) || float.IsNaN (v2.z) ? -1 : 0;
704 *c = float.IsNaN (v1.w) || float.IsNaN (v2.w) ? -1 : 0;
708 /*Same double.IsNaN (a) || double.IsNaN (b). */
709 [Acceleration (AccelMode.SSE2)]
710 public unsafe static Vector2d CompareUnordered (this Vector2d v1, Vector2d v2)
712 Vector2d res = new Vector2d ();
713 long *c = (long*)&res;
714 *c++ = double.IsNaN (v1.x) || double.IsNaN (v2.x) ? -1 : 0;
715 *c = double.IsNaN (v1.y) || double.IsNaN (v2.y) ? -1 : 0;
720 [Acceleration (AccelMode.SSE1)]
721 public unsafe static Vector4f CompareNotEqual (this Vector4f v1, Vector4f v2)
723 Vector4f res = new Vector4f ();
725 *c++ = v1.x != v2.x ? -1 : 0;
726 *c++ = v1.y != v2.y ? -1 : 0;
727 *c++ = v1.z != v2.z ? -1 : 0;
728 *c = v1.w != v2.w ? -1 : 0;
733 [Acceleration (AccelMode.SSE2)]
734 public unsafe static Vector2d CompareNotEqual (this Vector2d v1, Vector2d v2)
736 Vector2d res = new Vector2d ();
737 long *c = (long*)&res;
738 *c++ = v1.x != v2.x ? -1 : 0;
739 *c = v1.y != v2.y ? -1 : 0;
743 /*Same as !(a < b). */
744 [Acceleration (AccelMode.SSE1)]
745 public unsafe static Vector4f CompareNotLessThan (this Vector4f v1, Vector4f v2)
747 Vector4f res = new Vector4f ();
749 *c++ = v1.x < v2.x ? 0 : -1;
750 *c++ = v1.y < v2.y ? 0 : -1;
751 *c++ = v1.z < v2.z ? 0 : -1;
752 *c = v1.w < v2.w ? 0 : -1;
756 /*Same as !(a < b). */
757 [Acceleration (AccelMode.SSE2)]
758 public unsafe static Vector2d CompareNotLessThan (this Vector2d v1, Vector2d v2)
760 Vector2d res = new Vector2d ();
761 long *c = (long*)&res;
762 *c++ = v1.x < v2.x ? 0 : -1;
763 *c = v1.y < v2.y ? 0 : -1;
767 /*Same as !(a <= b). */
768 [Acceleration (AccelMode.SSE1)]
769 public unsafe static Vector4f CompareNotLessEqual (this Vector4f v1, Vector4f v2)
771 Vector4f res = new Vector4f ();
773 *c++ = v1.x <= v2.x ? 0 : -1;
774 *c++ = v1.y <= v2.y ? 0 : -1;
775 *c++ = v1.z <= v2.z ? 0 : -1;
776 *c = v1.w <= v2.w ? 0 : -1;
780 /*Same as !(a <= b). */
781 [Acceleration (AccelMode.SSE2)]
782 public unsafe static Vector2d CompareNotLessEqual (this Vector2d v1, Vector2d v2)
784 Vector2d res = new Vector2d ();
785 long *c = (long*)&res;
786 *c++ = v1.x <= v2.x ? 0 : -1;
787 *c = v1.y <= v2.y ? 0 : -1;
791 /*Same !float.IsNaN (a) && !float.IsNaN (b). */
792 [Acceleration (AccelMode.SSE1)]
793 public unsafe static Vector4f CompareOrdered (this Vector4f v1, Vector4f v2)
795 Vector4f res = new Vector4f ();
797 *c++ = !float.IsNaN (v1.x) && !float.IsNaN (v2.x) ? -1 : 0;
798 *c++ = !float.IsNaN (v1.y) && !float.IsNaN (v2.y) ? -1 : 0;
799 *c++ = !float.IsNaN (v1.z) && !float.IsNaN (v2.z) ? -1 : 0;
800 *c = !float.IsNaN (v1.w) && !float.IsNaN (v2.w) ? -1 : 0;
804 /*Same !double.IsNaN (a) && !double.IsNaN (b). */
805 [Acceleration (AccelMode.SSE2)]
806 public unsafe static Vector2d CompareOrdered (this Vector2d v1, Vector2d v2)
808 Vector2d res = new Vector2d ();
809 long *c = (long*)&res;
810 *c++ = !double.IsNaN (v1.x) && !double.IsNaN (v2.x) ? -1 : 0;
811 *c = !double.IsNaN (v1.y) && !double.IsNaN (v2.y) ? -1 : 0;
816 /* ==== Data shuffling ==== */
818 [Acceleration (AccelMode.SSE1)]
819 public static Vector4f InterleaveHigh (this Vector4f v1, Vector4f v2)
821 return new Vector4f (v1.z, v2.z, v1.w, v2.w);
824 [Acceleration (AccelMode.SSE2)]
825 public static Vector2d InterleaveHigh (this Vector2d v1, Vector2d v2)
827 return new Vector2d (v1.y, v2.y);
830 [Acceleration (AccelMode.SSE1)]
831 public static Vector4f InterleaveLow (this Vector4f v1, Vector4f v2)
833 return new Vector4f (v1.x, v2.x, v1.y, v2.y);
836 [Acceleration (AccelMode.SSE2)]
837 public static Vector2d InterleaveLow (this Vector2d v1, Vector2d v2)
839 return new Vector2d (v1.x, v2.x);
842 [Acceleration (AccelMode.SSE3)]
843 public static Vector2d Duplicate (this Vector2d v1)
845 return new Vector2d (v1.x, v1.x);
848 [Acceleration (AccelMode.SSE3)]
849 public static Vector4f DuplicateLow (this Vector4f v1)
851 return new Vector4f (v1.x, v1.x, v1.z, v1.z);
854 [Acceleration (AccelMode.SSE3)]
855 public static Vector4f DuplicateHigh (this Vector4f v1)
857 return new Vector4f (v1.y, v1.y, v1.w, v1.w);
860 [Acceleration (AccelMode.SSE2)]
861 public static Vector2l UnpackLow (this Vector2l v1, Vector2l v2)
863 return new Vector2l (v1.x, v2.x);
866 [Acceleration (AccelMode.SSE2)]
867 [CLSCompliant (false)]
868 public static Vector2ul UnpackLow (this Vector2ul v1, Vector2ul v2)
870 return new Vector2ul (v1.x, v2.x);
873 [Acceleration (AccelMode.SSE2)]
874 public static Vector4i UnpackLow (this Vector4i v1, Vector4i v2)
876 return new Vector4i (v1.x, v2.x, v1.y, v2.y);
879 [Acceleration (AccelMode.SSE2)]
880 [CLSCompliant (false)]
881 public static Vector4ui UnpackLow (this Vector4ui v1, Vector4ui v2)
883 return new Vector4ui (v1.x, v2.x, v1.y, v2.y);
886 [Acceleration (AccelMode.SSE2)]
887 public static unsafe Vector8s UnpackLow (this Vector8s va, Vector8s vb)
889 return new Vector8s (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3);
892 [Acceleration (AccelMode.SSE2)]
893 [CLSCompliant (false)]
894 public static unsafe Vector8us UnpackLow (this Vector8us va, Vector8us vb)
896 return new Vector8us (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3);
899 [Acceleration (AccelMode.SSE2)]
900 [CLSCompliant (false)]
901 public static unsafe Vector16sb UnpackLow (this Vector16sb va, Vector16sb vb)
903 return new Vector16sb (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3, va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
906 [Acceleration (AccelMode.SSE2)]
907 public static unsafe Vector16b UnpackLow (this Vector16b va, Vector16b vb)
909 return new Vector16b (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3, va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
912 [Acceleration (AccelMode.SSE2)]
913 public static Vector2l UnpackHigh (this Vector2l v1, Vector2l v2)
915 return new Vector2l (v1.y, v2.y);
918 [Acceleration (AccelMode.SSE2)]
919 [CLSCompliant (false)]
920 public static Vector2ul UnpackHigh (this Vector2ul v1, Vector2ul v2)
922 return new Vector2ul (v1.y, v2.y);
925 [Acceleration (AccelMode.SSE2)]
926 public static Vector4i UnpackHigh (this Vector4i v1, Vector4i v2)
928 return new Vector4i (v1.z, v2.z, v1.w, v2.w);
931 [Acceleration (AccelMode.SSE2)]
932 [CLSCompliant (false)]
933 public static Vector4ui UnpackHigh (this Vector4ui v1, Vector4ui v2)
935 return new Vector4ui (v1.z, v2.z, v1.w, v2.w);
938 [Acceleration (AccelMode.SSE2)]
939 public static unsafe Vector8s UnpackHigh (this Vector8s va, Vector8s vb)
941 return new Vector8s (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
944 [Acceleration (AccelMode.SSE2)]
945 [CLSCompliant (false)]
946 public static unsafe Vector8us UnpackHigh (this Vector8us va, Vector8us vb)
948 return new Vector8us (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
951 [Acceleration (AccelMode.SSE2)]
952 [CLSCompliant (false)]
953 public static unsafe Vector16sb UnpackHigh (this Vector16sb va, Vector16sb vb)
955 return new Vector16sb (va.v8, vb.v8, va.v9, vb.v9, va.v10, vb.v10, va.v11, vb.v11, va.v12, vb.v12, va.v13, vb.v13, va.v14, vb.v14, va.v15, vb.v15);
958 [Acceleration (AccelMode.SSE2)]
959 public static unsafe Vector16b UnpackHigh (this Vector16b va, Vector16b vb)
961 return new Vector16b (va.v8, vb.v8, va.v9, vb.v9, va.v10, vb.v10, va.v11, vb.v11, va.v12, vb.v12, va.v13, vb.v13, va.v14, vb.v14, va.v15, vb.v15);
964 [Acceleration (AccelMode.SSE2)]
965 public static unsafe Vector4f Shuffle (this Vector4f v1, ShuffleSel sel)
967 float *ptr = (float*)&v1;
969 return new Vector4f (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3)));
972 [Acceleration (AccelMode.SSE2)]
973 public static unsafe Vector4i Shuffle (this Vector4i v1, ShuffleSel sel)
975 int *ptr = (int*)&v1;
977 return new Vector4i (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3)));
980 [Acceleration (AccelMode.SSE2)]
981 [CLSCompliant (false)]
982 public static unsafe Vector4ui Shuffle (this Vector4ui v1, ShuffleSel sel)
984 uint *ptr = (uint*)&v1;
986 return new Vector4ui (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3)));
989 [Acceleration (AccelMode.SSE2)]
990 public static unsafe Vector8s ShuffleHigh (this Vector8s va, ShuffleSel sel)
992 short *ptr = ((short*)&va) + 4;
994 return new Vector8s (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)));
997 [Acceleration (AccelMode.SSE2)]
998 [CLSCompliant (false)]
999 public static unsafe Vector8us ShuffleHigh (this Vector8us va, ShuffleSel sel)
1001 ushort *ptr = ((ushort*)&va) + 4;
1003 return new Vector8us (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)));
1006 [Acceleration (AccelMode.SSE2)]
1007 public static unsafe Vector8s ShuffleLow (this Vector8s va, ShuffleSel sel)
1009 short *ptr = ((short*)&va);
1011 return new Vector8s (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7);
1014 [Acceleration (AccelMode.SSE2)]
1015 [CLSCompliant (false)]
1016 public static unsafe Vector8us ShuffleLow (this Vector8us va, ShuffleSel sel)
1018 ushort *ptr = ((ushort*)&va);
1020 return new Vector8us (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7);
1023 [CLSCompliant(false)]
1024 [Acceleration (AccelMode.SSE41)]
1025 public static unsafe Vector8us PackWithUnsignedSaturation (this Vector4i va, Vector4i vb) {
1026 Vector8us res = new Vector8us ();
1029 ushort *c = (ushort*)&res;
1030 for (int i = 0; i < 4; ++i)
1031 *c++ = (ushort)System.Math.Max (0, System.Math.Min (*a++, ushort.MaxValue));
1032 for (int i = 0; i < 4; ++i)
1033 *c++ = (ushort)System.Math.Max (0, System.Math.Min (*b++, ushort.MaxValue));
1038 [Acceleration (AccelMode.SSE2)]
1039 public static unsafe Vector16b PackWithUnsignedSaturation (this Vector8s va, Vector8s vb) {
1040 Vector16b res = new Vector16b ();
1041 short *a = (short*)&va;
1042 short *b = (short*)&vb;
1043 byte *c = (byte*)&res;
1044 for (int i = 0; i < 8; ++i)
1045 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue));
1046 for (int i = 0; i < 8; ++i)
1047 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue));
1051 [Acceleration (AccelMode.SSE2)]
1052 public static unsafe Vector8s PackWithSignedSaturation (this Vector4i va, Vector4i vb) {
1053 Vector8s res = new Vector8s ();
1056 short *c = (short*)&res;
1057 for (int i = 0; i < 4; ++i)
1058 *c++ = (short)System.Math.Max (System.Math.Min ((int)*a++, short.MaxValue), short.MinValue);
1059 for (int i = 0; i < 4; ++i)
1060 *c++ = (short)System.Math.Max (System.Math.Min ((int)*b++, short.MaxValue), short.MinValue);
1064 [CLSCompliant(false)]
1065 [Acceleration (AccelMode.SSE2)]
1066 public static unsafe Vector16sb PackWithSignedSaturation (this Vector8s va, Vector8s vb) {
1067 Vector16sb res = new Vector16sb ();
1068 short *a = (short*)&va;
1069 short *b = (short*)&vb;
1070 sbyte *c = (sbyte*)&res;
1071 for (int i = 0; i < 8; ++i)
1072 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue);
1073 for (int i = 0; i < 8; ++i)
1074 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue);
1078 /* This function performs a packusdw, which treats the source as a signed value */
1079 [Acceleration (AccelMode.SSE41)]
1080 [CLSCompliant (false)]
1081 public static unsafe Vector8us SignedPackWithUnsignedSaturation (this Vector4ui va, Vector4ui vb) {
1082 Vector8us res = new Vector8us ();
1085 ushort *c = (ushort*)&res;
1086 for (int i = 0; i < 4; ++i)
1087 *c++ = (ushort)System.Math.Max (0, System.Math.Min (*a++, ushort.MaxValue));
1088 for (int i = 0; i < 4; ++i)
1089 *c++ = (ushort)System.Math.Max (0, System.Math.Min (*b++, ushort.MaxValue));
1093 /*This function performs a packuswb, which treats the source as a signed value */
1094 [Acceleration (AccelMode.SSE2)]
1095 [CLSCompliant (false)]
1096 public static unsafe Vector16b SignedPackWithUnsignedSaturation (this Vector8us va, Vector8us vb) {
1097 Vector16b res = new Vector16b ();
1098 short *a = (short*)&va;
1099 short *b = (short*)&vb;
1100 byte *c = (byte*)&res;
1101 for (int i = 0; i < 8; ++i)
1102 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue));
1103 for (int i = 0; i < 8; ++i)
1104 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue));
1108 /* This function performs a packssdw, which treats the source as a signed value*/
1109 [Acceleration (AccelMode.SSE2)]
1110 [CLSCompliant (false)]
1111 public static unsafe Vector8s SignedPackWithSignedSaturation (this Vector4ui va, Vector4ui vb) {
1112 Vector8s res = new Vector8s ();
1115 short *c = (short*)&res;
1116 for (int i = 0; i < 4; ++i)
1117 *c++ = (short)System.Math.Max (System.Math.Min ((int)*a++, short.MaxValue), short.MinValue);
1118 for (int i = 0; i < 4; ++i)
1119 *c++ = (short)System.Math.Max (System.Math.Min ((int)*b++, short.MaxValue), short.MinValue);
1123 /*This function performs a packsswb, which treats the source as a signed value */
1124 [Acceleration (AccelMode.SSE2)]
1125 [CLSCompliant (false)]
1126 public static unsafe Vector16sb SignedPackWithSignedSaturation (this Vector8us va, Vector8us vb) {
1127 Vector16sb res = new Vector16sb ();
1128 short *a = (short*)&va;
1129 short *b = (short*)&vb;
1130 sbyte *c = (sbyte*)&res;
1131 for (int i = 0; i < 8; ++i)
1132 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue);
1133 for (int i = 0; i < 8; ++i)
1134 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue);