2008-11-04 Rodrigo Kumpera <rkumpera@novell.com>
[mono.git] / mcs / class / Mono.Simd / Mono.Simd / Vector8us.cs
1 // Vector8us.cs
2 //
3 // Author:
4 //   Rodrigo Kumpera (rkumpera@novell.com)
5 //
6 // (C) 2008 Novell, Inc. (http://www.novell.com)
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
15 //
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
18 //
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 //
27 using System;
28 using System.Runtime.InteropServices;
29
30 namespace Mono.Simd
31 {
32         [StructLayout(LayoutKind.Sequential, Pack = 0, Size = 16)]
33         [CLSCompliant(false)]
34         public struct Vector8us
35         {
36                 private ushort v0, v1, v2, v3, v4, v5, v6, v7;
37                 public Vector8us (ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
38                 {
39                         this.v0 = v0;
40                         this.v1 = v1;
41                         this.v2 = v2;
42                         this.v3 = v3;
43                         this.v4 = v4;
44                         this.v5 = v5;
45                         this.v6 = v6;
46                         this.v7 = v7;
47                 }
48
49                 public ushort V0 { get { return v0; } set { v0 = value; } }
50                 public ushort V1 { get { return v1; } set { v1 = value; } }
51                 public ushort V2 { get { return v2; } set { v2 = value; } }
52                 public ushort V3 { get { return v3; } set { v3 = value; } }
53                 public ushort V4 { get { return v4; } set { v4 = value; } }
54                 public ushort V5 { get { return v5; } set { v5 = value; } }
55                 public ushort V6 { get { return v6; } set { v6 = value; } }
56                 public ushort V7 { get { return v7; } set { v7 = value; } }
57
58                 [Acceleration (AccelMode.SSE2)]
59                 public static unsafe Vector8us operator + (Vector8us va, Vector8us vb)
60                 {
61                         Vector8us res = new Vector8us ();
62                         ushort *a = &va.v0;
63                         ushort *b = &vb.v0;
64                         ushort *c = &res.v0;
65                         for (int i = 0; i < 8; ++i)
66                                 *c++ = (ushort)(*a++ + *b++);
67                         return res;
68                 }
69
70                 [Acceleration (AccelMode.SSE2)]
71                 public static unsafe Vector8us operator - (Vector8us va, Vector8us vb)
72                 {
73                         Vector8us res = new Vector8us ();
74                         ushort *a = &va.v0;
75                         ushort *b = &vb.v0;
76                         ushort *c = &res.v0;
77                         for (int i = 0; i < 8; ++i)
78                                 *c++ = (ushort)(*a++ - *b++);
79                         return res;
80                 }
81
82                 /*
83                  * NOTE: Thou pmullw states it does signed multiplication, it works for unsigned numbers
84                  * if only the lower part is considered and the flags disregarded.
85                  */
86                 [Acceleration (AccelMode.SSE2)]
87                 public static unsafe Vector8us operator * (Vector8us va, Vector8us vb)
88                 {
89                         Vector8us res = new Vector8us ();
90                         ushort *a = &va.v0;
91                         ushort *b = &vb.v0;
92                         ushort *c = &res.v0;
93                         for (int i = 0; i < 8; ++i)
94                                 *c++ = (ushort)(*a++ * (*b++));
95                         return res;
96                 }
97
98                 [Acceleration (AccelMode.SSE2)]
99                 public static unsafe Vector8us operator >> (Vector8us va, int amount)
100                 {
101                         Vector8us res = new Vector8us ();
102                         ushort *a = &va.v0;
103                         ushort *b = &res.v0;
104                         for (int i = 0; i < 8; ++i)
105                                 *b++ = (ushort)(*a++ >> amount);
106                         return res;
107                 }
108
109                 [Acceleration (AccelMode.SSE2)]
110                 public static unsafe Vector8us operator << (Vector8us va, int amount)
111                 {
112                         Vector8us res = new Vector8us ();
113                         ushort *a = &va.v0;
114                         ushort *b = &res.v0;
115                         for (int i = 0; i < 8; ++i)
116                                 *b++ = (ushort)(*a++ << amount);
117                         return res;
118                 }
119
120                 [Acceleration (AccelMode.SSE2)]
121                 public static unsafe Vector8us operator & (Vector8us va, Vector8us vb)
122                 {
123                         Vector8us res = new Vector8us ();
124                         ushort *a = &va.v0;
125                         ushort *b = &vb.v0;
126                         ushort *c = &res.v0;
127                         for (int i = 0; i < 8; ++i)
128                                 *c++ = (ushort)(*a++ & *b++);
129                         return res;
130                 }
131
132                 [Acceleration (AccelMode.SSE2)]
133                 public static unsafe Vector8us operator | (Vector8us va, Vector8us vb)
134                 {
135                         Vector8us res = new Vector8us ();
136                         ushort *a = &va.v0;
137                         ushort *b = &vb.v0;
138                         ushort *c = &res.v0;
139                         for (int i = 0; i < 8; ++i)
140                                 *c++ = (ushort)(*a++ | *b++);
141                         return res;
142                 }
143
144                 [Acceleration (AccelMode.SSE2)]
145                 public static unsafe Vector8us operator ^ (Vector8us va, Vector8us vb)
146                 {
147                         Vector8us res = new Vector8us ();
148                         ushort *a = &va.v0;
149                         ushort *b = &vb.v0;
150                         ushort *c = &res.v0;
151                         for (int i = 0; i < 8; ++i)
152                                 *c++ = (ushort)(*a++ ^ *b++);
153                         return res;
154                 }
155
156                 [Acceleration (AccelMode.SSE2)]
157                 public static unsafe Vector8us UnpackLow (Vector8us va, Vector8us vb)
158                 {
159                         return new Vector8us (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3);
160                 }
161
162                 [Acceleration (AccelMode.SSE2)]
163                 public static unsafe Vector8us UnpackHigh (Vector8us va, Vector8us vb)
164                 {
165                         return new Vector8us (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
166                 }
167
168                 [Acceleration (AccelMode.SSE2)]
169                 public static unsafe Vector8us ShiftRightArithmetic (Vector8us va, int amount)
170                 {
171                         Vector8us res = new Vector8us ();
172                         ushort *a = &va.v0;
173                         ushort *b = &res.v0;
174                         for (int i = 0; i < 8; ++i)
175                                 *b++ = (ushort)((short)(*a++) >> amount);
176                         return res;
177                 }
178
179                 [Acceleration (AccelMode.SSE2)]
180                 public static unsafe Vector8us AddWithSaturation (Vector8us va, Vector8us vb) {
181                         Vector8us res = new Vector8us ();
182                         ushort *a = &va.v0;
183                         ushort *b = &vb.v0;
184                         ushort *c = &res.v0;
185                         for (int i = 0; i < 8; ++i)
186                                 *c++ = (ushort) System.Math.Min (*a++ + *b++, ushort.MaxValue);
187                         return res;
188                 }
189
190                 [Acceleration (AccelMode.SSE2)]
191                 public static unsafe Vector8us SubWithSaturation (Vector8us va, Vector8us vb) {
192                         Vector8us res = new Vector8us ();
193                         ushort *a = &va.v0;
194                         ushort *b = &vb.v0;
195                         ushort *c = &res.v0;
196                         for (int i = 0; i < 8; ++i)
197                                 *c++ = (ushort) System.Math.Max (*a++ - *b++, 0);
198                         return res;
199                 }
200
201                 [Acceleration (AccelMode.SSE2)]
202                 public static unsafe Vector8us Average (Vector8us va, Vector8us vb) {
203                         Vector8us res = new Vector8us ();
204                         ushort *a = &va.v0;
205                         ushort *b = &vb.v0;
206                         ushort *c = &res.v0;
207                         for (int i = 0; i < 8; ++i)
208                                 *c++ = (ushort) ((*a++ + *b++ + 1) >> 1);
209                         return res;
210                 }
211
212                 [Acceleration (AccelMode.SSE41)]
213                 public static unsafe Vector8us Max (Vector8us va, Vector8us vb) {
214                         Vector8us res = new Vector8us ();
215                         ushort *a = &va.v0;
216                         ushort *b = &vb.v0;
217                         ushort *c = &res.v0;
218                         for (int i = 0; i < 8; ++i)
219                                 *c++ = (ushort) System.Math.Max (*a++, *b++);
220                         return res;
221                 }
222
223                 [Acceleration (AccelMode.SSE41)]
224                 public static unsafe Vector8us Min (Vector8us va, Vector8us vb) {
225                         Vector8us res = new Vector8us ();
226                         ushort *a = &va.v0;
227                         ushort *b = &vb.v0;
228                         ushort *c = &res.v0;
229                         for (int i = 0; i < 8; ++i)
230                                 *c++ = (ushort) System.Math.Min (*a++, *b++);
231                         return res;
232                 }
233
234                 [Acceleration (AccelMode.SSE2)]
235                 public static unsafe int ExtractByteMask (Vector8us va) {
236                         int res = 0;
237                         byte *a = (byte*)&va;
238                         for (int i = 0; i < 16; ++i)
239                                 res |= (*a++ & 0x80) >> 7 << i;
240                         return res;
241                 }
242
243                 [Acceleration (AccelMode.SSE2)]
244                 public static unsafe Vector8us ShuffleHigh (Vector8us va, ShuffleSel sel)
245                 {
246                         ushort *ptr = ((ushort*)&va) + 4;
247                         int idx = (int)sel;
248                         return new Vector8us (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)));
249                 }
250
251                 [Acceleration (AccelMode.SSE2)]
252                 public static unsafe Vector8us ShuffleLow (Vector8us va, ShuffleSel sel)
253                 {
254                         ushort *ptr = ((ushort*)&va);
255                         int idx = (int)sel;
256                         return new Vector8us (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7);
257                 }
258
259                 [Acceleration (AccelMode.SSE2)]
260                 public static unsafe Vector8us CompareEqual (Vector8us va, Vector8us vb) {
261                         Vector8us res = new Vector8us ();
262                         ushort *a = &va.v0;
263                         ushort *b = &vb.v0;
264                         ushort *c = &res.v0;
265                         for (int i = 0; i < 8; ++i)
266                                 *c++ = (ushort) (*a++ == *b++ ? -1 : 0);
267                         return res;
268                 }
269
270                 [Acceleration (AccelMode.SSE2)]
271                 public static unsafe Vector8us MultiplyStoreHigh (Vector8us va, Vector8us vb) {
272                         Vector8us res = new Vector8us ();
273                         ushort *a = &va.v0;
274                         ushort *b = &vb.v0;
275                         ushort *c = &res.v0;
276                         for (int i = 0; i < 8; ++i)
277                                 *c++ = (ushort)((uint)*a++ * (uint)*b++ >> 16);
278                         return res;
279                 }
280
281                 /*This function performs a packuswb, which treats the source as a signed value */
282                 [Acceleration (AccelMode.SSE2)]
283                 public static unsafe Vector16b SignedPackWithUnsignedSaturation (Vector8us va, Vector8us vb) {
284                         Vector16b res = new Vector16b ();
285                         short *a = (short*)&va;
286                         short *b = (short*)&vb;
287                         byte *c = (byte*)&res;
288                         for (int i = 0; i < 8; ++i)
289                                 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue));
290                         for (int i = 0; i < 8; ++i)
291                                 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue));
292                         return res;
293                 }
294
295                 /*This function performs a packsswb, which treats the source as a signed value */
296                 [Acceleration (AccelMode.SSE2)]
297                 public static unsafe Vector16sb SignedPackWithSignedSaturation (Vector8us va, Vector8us vb) {
298                         Vector16sb res = new Vector16sb ();
299                         short *a = (short*)&va;
300                         short *b = (short*)&vb;
301                         sbyte *c = (sbyte*)&res;
302                         for (int i = 0; i < 8; ++i)
303                                 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue);
304                         for (int i = 0; i < 8; ++i)
305                                 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue);
306                         return res;
307                 }
308
309                 [Acceleration (AccelMode.SSE1)]
310                 public static unsafe explicit operator Vector2d (Vector8us v)
311                 {
312                         Vector2d* p = (Vector2d*)&v;
313                         return *p;
314                 }
315
316                 [Acceleration (AccelMode.SSE1)]
317                 public static unsafe explicit operator Vector4f (Vector8us v)
318                 {
319                         Vector4f* p = (Vector4f*)&v;
320                         return *p;
321                 }
322
323                 [Acceleration (AccelMode.SSE1)]
324                 public static unsafe explicit operator Vector2l (Vector8us v)
325                 {
326                         Vector2l* p = (Vector2l*)&v;
327                         return *p;
328                 }
329
330                 [Acceleration (AccelMode.SSE1)]
331                 public static unsafe explicit operator Vector2ul (Vector8us v)
332                 {
333                         Vector2ul* p = (Vector2ul*)&v;
334                         return *p;
335                 }
336
337                 [Acceleration (AccelMode.SSE1)]
338                 public static unsafe explicit operator Vector4i (Vector8us v)
339                 {
340                         Vector4i* p = (Vector4i*)&v;
341                         return *p;
342                 }
343
344                 [Acceleration (AccelMode.SSE1)]
345                 public static unsafe explicit operator Vector4ui (Vector8us v)
346                 {
347                         Vector4ui* p = (Vector4ui*)&v;
348                         return *p;
349                 }
350
351                 [Acceleration (AccelMode.SSE1)]
352                 public static unsafe explicit operator Vector8s (Vector8us v)
353                 {
354                         Vector8s* p = (Vector8s*)&v;
355                         return *p;
356                 }
357
358                 [Acceleration (AccelMode.SSE1)]
359                 public static unsafe explicit operator Vector16sb (Vector8us v)
360                 {
361                         Vector16sb* p = (Vector16sb*)&v;
362                         return *p;
363                 }
364
365                 [Acceleration (AccelMode.SSE1)]
366                 public static unsafe explicit operator Vector16b (Vector8us v)
367                 {
368                         Vector16b* p = (Vector16b*)&v;
369                         return *p;
370                 }
371
372
373                 [Acceleration (AccelMode.SSE1)]
374                 public static Vector8us LoadAligned (ref Vector8us v)
375                 {
376                         return v;
377                 }
378
379                 [Acceleration (AccelMode.SSE1)]
380                 public static void StoreAligned (ref Vector8us res, Vector8us val)
381                 {
382                         res = val;
383                 }
384
385                 [Acceleration (AccelMode.SSE1)]
386                 public static unsafe Vector8us LoadAligned (Vector8us *v)
387                 {
388                         return *v;
389                 }
390
391                 [Acceleration (AccelMode.SSE1)]
392                 public static unsafe void StoreAligned (Vector8us *res, Vector8us val)
393                 {
394                         *res = val;
395                 }
396
397                 [Acceleration (AccelMode.SSE1)]
398                 [CLSCompliant(false)]
399                 public static void PrefetchTemporalAllCacheLevels (ref Vector8us res)
400                 {
401                 }
402
403                 [Acceleration (AccelMode.SSE1)]
404                 [CLSCompliant(false)]
405                 public static void PrefetchTemporal1stLevelCache (ref Vector8us res)
406                 {
407                 }
408
409                 [Acceleration (AccelMode.SSE1)]
410                 [CLSCompliant(false)]
411                 public static void PrefetchTemporal2ndLevelCache (ref Vector8us res)
412                 {
413                 }
414
415                 [Acceleration (AccelMode.SSE1)]
416                 [CLSCompliant(false)]
417                 public static void PrefetchNonTemporal (ref Vector8us res)
418                 {
419                 }
420
421                 [Acceleration (AccelMode.SSE1)]
422                 [CLSCompliant(false)]
423                 public static unsafe void PrefetchTemporalAllCacheLevels (Vector8us *res)
424                 {
425                 }
426
427                 [Acceleration (AccelMode.SSE1)]
428                 [CLSCompliant(false)]
429                 public static unsafe void PrefetchTemporal1stLevelCache (Vector8us *res)
430                 {
431                 }
432
433                 [Acceleration (AccelMode.SSE1)]
434                 [CLSCompliant(false)]
435                 public static unsafe void PrefetchTemporal2ndLevelCache (Vector8us *res)
436                 {
437                 }
438
439                 [Acceleration (AccelMode.SSE1)]
440                 [CLSCompliant(false)]
441                 public static unsafe void PrefetchNonTemporal (Vector8us *res)
442                 {
443                 }
444         }
445 }