2 * simd-instrisics.c: simd support for intrinsics
5 * Rodrigo Kumpera (rkumpera@novell.com)
7 * (C) 2008 Novell, Inc.
15 #include "mono/utils/bsearch.h"
16 #include <mono/metadata/abi-details.h>
19 General notes on SIMD intrinsics
21 TODO handle operands with non SIMD args, such as op_Addition (Vector4f, float)
22 TODO optimize r4const in .ctor so it doesn't go into the FP stack first
23 TODO extend op_to_op_dest_membase to handle simd ops
24 TODO add support for indexed versions of simd ops
25 TODO to an amd64 port and figure out how to properly handle extractors/.ctor
26 TODO make sure locals, arguments and spills are properly aligned.
27 TODO add support for fusing a XMOVE into a simd op in mono_spill_global_vars.
28 TODO add stuff to man pages
29 TODO document this under /docs
30 TODO make passing a xmm as argument not cause it to be LDADDR'ed (introduce an OP_XPUSH)
31 TODO revamp the .ctor sequence as it looks very fragile, maybe use a var just like move_i4_to_f. (or just pinst sse ops)
32 TODO figure out what's wrong with OP_STOREX_MEMBASE_REG and OP_STOREX_MEMBASE (the 2nd is for imm operands)
33 TODO maybe add SSE3 emulation on top of SSE2, or just implement the corresponding functions using SSE2 intrinsics.
34 TODO pass simd arguments in registers or, at least, add SSE support for pushing large (>=16) valuetypes
35 TODO pass simd args byval to a non-intrinsic method cause some useless local var load/store to happen.
36 TODO check if we need to init the SSE control word with better precision.
37 TODO add support for 3 reg sources in mini without slowing the common path. Or find a way to make MASKMOVDQU work.
38 TODO make SimdRuntime.get_AccelMode work under AOT
39 TODO patterns such as "a ^= b" generate slower code as the LDADDR op will be copied to a tmp first. Look at adding a indirection reduction pass after the dce pass.
40 TODO extend bounds checking code to support for range checking.
42 General notes for SIMD intrinsics.
44 -Bad extractor and constructor performance
45 Extracting a float from a XMM is a complete disaster if you are passing it as an argument.
46 It will be loaded in the FP stack just to be pushed on the call stack.
48 A similar thing happens with Vector4f constructor that require float vars to be
50 The fix for this issue is similar to the one required for r4const as method args. Avoiding the
51 trip to the FP stack is desirable.
53 -Extractor and constructor code doesn't make sense under amd64. Both currently assume separate banks
57 -Promote OP_EXTRACT_I4 to a STORE op
58 The advantage of this change is that it could have a _membase version and promote further optimizations.
60 -Create a MONO_INST_DONT_REGALLOC and use it in all places that MONO_INST_INDIRECT is used
64 #if defined (MONO_ARCH_SIMD_INTRINSICS)
66 #if defined (DISABLE_JIT)
69 mono_simd_intrinsics_init (void)
75 //#define IS_DEBUG_ON(cfg) (0)
77 #define IS_DEBUG_ON(cfg) ((cfg)->verbose_level >= 3)
78 #define DEBUG(a) do { if (IS_DEBUG_ON(cfg)) { a; } } while (0)
84 SIMD_EMIT_GETTER_QWORD,
90 SIMD_EMIT_LOAD_ALIGNED,
92 SIMD_EMIT_EXTRACT_MASK,
96 #ifdef HAVE_ARRAY_ELEM_INIT
97 #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
98 #define MSGSTRFIELD1(line) str##line
99 static const struct msgstr_t {
100 #define SIMD_METHOD(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
101 #include "simd-methods.h"
104 #define SIMD_METHOD(str,name) str,
105 #include "simd-methods.h"
110 #define SIMD_METHOD(str,name) name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
111 #include "simd-methods.h"
113 #define method_name(idx) ((const char*)&method_names + (idx))
116 #define SIMD_METHOD(str,name) str,
117 static const char * const method_names [] = {
118 #include "simd-methods.h"
122 #define SIMD_METHOD(str,name) name,
124 #include "simd-methods.h"
128 #define method_name(idx) (method_names [(idx)])
135 guint8 simd_version_flags;
136 guint8 simd_emit_mode : 4;
140 static const SimdIntrinsic vector4f_intrinsics[] = {
141 { SN_ctor, OP_EXPAND_R4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
142 { SN_AddSub, OP_ADDSUBPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY},
143 { SN_AndNot, OP_ANDNPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY},
144 { SN_CompareEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_EQ },
145 { SN_CompareLessEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LE },
146 { SN_CompareLessThan, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LT },
147 { SN_CompareNotEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NEQ },
148 { SN_CompareNotLessEqual, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLE },
149 { SN_CompareNotLessThan, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT },
150 { SN_CompareOrdered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD },
151 { SN_CompareUnordered, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD },
152 { SN_ConvertToDouble, OP_CVTPS2PD, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
153 { SN_ConvertToInt, OP_CVTPS2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
154 { SN_ConvertToIntTruncated, OP_CVTTPS2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
155 { SN_DuplicateHigh, OP_DUPPS_HIGH, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
156 { SN_DuplicateLow, OP_DUPPS_LOW, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
157 { SN_HorizontalAdd, OP_HADDPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
158 { SN_HorizontalSub, OP_HSUBPS, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
159 { SN_InterleaveHigh, OP_UNPACK_HIGHPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
160 { SN_InterleaveLow, OP_UNPACK_LOWPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
161 { SN_InvSqrt, OP_RSQRTPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
162 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
163 { SN_Max, OP_MAXPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
164 { SN_Min, OP_MINPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
165 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
166 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
167 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
168 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
169 { SN_Reciprocal, OP_RCPPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
170 { SN_Shuffle, OP_PSHUFLED, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
171 { SN_Sqrt, OP_SQRTPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
172 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
173 { SN_StoreNonTemporal, OP_STOREX_NTA_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
174 { SN_get_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
175 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
176 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
177 { SN_get_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
178 { SN_op_Addition, OP_ADDPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
179 { SN_op_BitwiseAnd, OP_ANDPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
180 { SN_op_BitwiseOr, OP_ORPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
181 { SN_op_Division, OP_DIVPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
182 { SN_op_Equality, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
183 { SN_op_ExclusiveOr, OP_XORPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
184 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
185 { SN_op_Inequality, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
186 { SN_op_Multiply, OP_MULPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
187 { SN_op_Subtraction, OP_SUBPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
188 { SN_set_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
189 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
190 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
191 { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER }
194 static const SimdIntrinsic vector2d_intrinsics[] = {
195 { SN_ctor, OP_EXPAND_R8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
196 { SN_AddSub, OP_ADDSUBPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY,},
197 { SN_AndNot, OP_ANDNPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
198 { SN_CompareEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_EQ },
199 { SN_CompareLessEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LE },
200 { SN_CompareLessThan, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_LT },
201 { SN_CompareNotEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NEQ },
202 { SN_CompareNotLessEqual, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLE },
203 { SN_CompareNotLessThan, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_NLT },
204 { SN_CompareOrdered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_ORD },
205 { SN_CompareUnordered, OP_COMPPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_COMP_UNORD },
206 { SN_ConvertToFloat, OP_CVTPD2PS, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
207 { SN_ConvertToInt, OP_CVTPD2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
208 { SN_ConvertToIntTruncated, OP_CVTTPD2DQ, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
209 { SN_Duplicate, OP_DUPPD, SIMD_VERSION_SSE3, SIMD_EMIT_UNARY },
210 { SN_HorizontalAdd, OP_HADDPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
211 { SN_HorizontalSub, OP_HSUBPD, SIMD_VERSION_SSE3, SIMD_EMIT_BINARY },
212 { SN_InterleaveHigh, OP_UNPACK_HIGHPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
213 { SN_InterleaveLow, OP_UNPACK_LOWPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
214 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
215 { SN_Max, OP_MAXPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
216 { SN_Min, OP_MINPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
217 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
218 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
219 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
220 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
221 { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
222 { SN_Sqrt, OP_SQRTPD, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
223 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
224 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
225 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
226 { SN_op_Addition, OP_ADDPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
227 { SN_op_BitwiseAnd, OP_ANDPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
228 { SN_op_BitwiseOr, OP_ORPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
229 { SN_op_Division, OP_DIVPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
230 { SN_op_ExclusiveOr, OP_XORPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
231 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
232 { SN_op_Multiply, OP_MULPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
233 { SN_op_Subtraction, OP_SUBPD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
234 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
235 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
238 static const SimdIntrinsic vector2ul_intrinsics[] = {
239 { SN_ctor, OP_EXPAND_I8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
240 { SN_CompareEqual, OP_PCMPEQQ, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
241 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
242 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
243 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
244 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
245 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
246 { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
247 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
248 { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
249 { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
250 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
251 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
252 { SN_op_Addition, OP_PADDQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
253 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
254 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
255 { SN_op_ExclusiveOr, OP_PXOR, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
256 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
257 { SN_op_LeftShift, OP_PSHLQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
258 { SN_op_Multiply, OP_PMULQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
259 { SN_op_RightShift, OP_PSHRQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
260 { SN_op_Subtraction, OP_PSUBQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
261 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
262 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
265 static const SimdIntrinsic vector2l_intrinsics[] = {
266 { SN_ctor, OP_EXPAND_I8, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
267 { SN_CompareEqual, OP_PCMPEQQ, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
268 { SN_CompareGreaterThan, OP_PCMPGTQ, SIMD_VERSION_SSE42, SIMD_EMIT_BINARY },
269 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
270 { SN_LogicalRightShift, OP_PSHRQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
271 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
272 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
273 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
274 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
275 { SN_Shuffle, OP_SHUFPD, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
276 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
277 { SN_UnpackHigh, OP_UNPACK_HIGHQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
278 { SN_UnpackLow, OP_UNPACK_LOWQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
279 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
280 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER_QWORD },
281 { SN_op_Addition, OP_PADDQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
282 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
283 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
284 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
285 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
286 { SN_op_LeftShift, OP_PSHLQ, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
287 { SN_op_Multiply, OP_PMULQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
288 { SN_op_Subtraction, OP_PSUBQ, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
289 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
290 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
293 static const SimdIntrinsic vector4ui_intrinsics[] = {
294 { SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
295 { SN_ArithmeticRightShift, OP_PSARD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
296 { SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
297 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
298 { SN_Max, OP_PMAXD_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
299 { SN_Min, OP_PMIND_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
300 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
301 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
302 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
303 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
304 { SN_Shuffle, OP_PSHUFLED, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
305 { SN_SignedPackWithSignedSaturation, OP_PACKD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
306 { SN_SignedPackWithUnsignedSaturation, OP_PACKD_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
307 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
308 { SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
309 { SN_UnpackLow, OP_UNPACK_LOWD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
310 { SN_get_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
311 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
312 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
313 { SN_get_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
314 { SN_op_Addition, OP_PADDD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
315 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
316 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
317 { SN_op_Equality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
318 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
319 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
320 { SN_op_Inequality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
321 { SN_op_LeftShift, OP_PSHLD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
322 { SN_op_Multiply, OP_PMULD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
323 { SN_op_RightShift, OP_PSHRD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
324 { SN_op_Subtraction, OP_PSUBD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
325 { SN_set_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
326 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
327 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
328 { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
331 static const SimdIntrinsic vector4i_intrinsics[] = {
332 { SN_ctor, OP_EXPAND_I4, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
333 { SN_CompareEqual, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
334 { SN_CompareGreaterThan, OP_PCMPGTD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
335 { SN_ConvertToDouble, OP_CVTDQ2PD, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
336 { SN_ConvertToFloat, OP_CVTDQ2PS, SIMD_VERSION_SSE2, SIMD_EMIT_UNARY },
337 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
338 { SN_LogicalRightShift, OP_PSHRD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
339 { SN_Max, OP_PMAXD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
340 { SN_Min, OP_PMIND, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
341 { SN_PackWithSignedSaturation, OP_PACKD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
342 { SN_PackWithUnsignedSaturation, OP_PACKD_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
343 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
344 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
345 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
346 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
347 { SN_Shuffle, OP_PSHUFLED, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
348 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
349 { SN_UnpackHigh, OP_UNPACK_HIGHD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
350 { SN_UnpackLow, OP_UNPACK_LOWD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
351 { SN_get_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
352 { SN_get_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
353 { SN_get_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
354 { SN_get_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
355 { SN_op_Addition, OP_PADDD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
356 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
357 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
358 { SN_op_Equality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
359 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
360 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
361 { SN_op_Inequality, OP_PCMPEQD, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
362 { SN_op_LeftShift, OP_PSHLD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
363 { SN_op_Multiply, OP_PMULD, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
364 { SN_op_RightShift, OP_PSARD, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
365 { SN_op_Subtraction, OP_PSUBD, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
366 { SN_set_W, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
367 { SN_set_X, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
368 { SN_set_Y, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
369 { SN_set_Z, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
372 static const SimdIntrinsic vector8us_intrinsics[] = {
373 { SN_ctor, OP_EXPAND_I2, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
374 { SN_AddWithSaturation, OP_PADDW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
375 { SN_ArithmeticRightShift, OP_PSARW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
376 { SN_Average, OP_PAVGW_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
377 { SN_CompareEqual, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY, SIMD_VERSION_SSE1 },
378 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
379 { SN_Max, OP_PMAXW_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
380 { SN_Min, OP_PMINW_UN, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
381 { SN_MultiplyStoreHigh, OP_PMULW_HIGH_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
382 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
383 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
384 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
385 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
386 { SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
387 { SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
388 { SN_SignedPackWithSignedSaturation, OP_PACKW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
389 { SN_SignedPackWithUnsignedSaturation, OP_PACKW_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
390 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
391 { SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
392 { SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
393 { SN_UnpackLow, OP_UNPACK_LOWW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
394 { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
395 { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
396 { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
397 { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
398 { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
399 { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
400 { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
401 { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
402 { SN_op_Addition, OP_PADDW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
403 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
404 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
405 { SN_op_Equality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
406 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
407 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
408 { SN_op_Inequality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
409 { SN_op_LeftShift, OP_PSHLW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
410 { SN_op_Multiply, OP_PMULW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
411 { SN_op_RightShift, OP_PSHRW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
412 { SN_op_Subtraction, OP_PSUBW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
413 { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
414 { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
415 { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
416 { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
417 { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
418 { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
419 { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
420 { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
423 static const SimdIntrinsic vector8s_intrinsics[] = {
424 { SN_ctor, OP_EXPAND_I2, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
425 { SN_AddWithSaturation, OP_PADDW_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
426 { SN_CompareEqual, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
427 { SN_CompareGreaterThan, OP_PCMPGTW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
428 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
429 { SN_LogicalRightShift, OP_PSHRW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
430 { SN_Max, OP_PMAXW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
431 { SN_Min, OP_PMINW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
432 { SN_MultiplyStoreHigh, OP_PMULW_HIGH, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
433 { SN_PackWithSignedSaturation, OP_PACKW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
434 { SN_PackWithUnsignedSaturation, OP_PACKW_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
435 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
436 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
437 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
438 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
439 { SN_ShuffleHigh, OP_PSHUFLEW_HIGH, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
440 { SN_ShuffleLow, OP_PSHUFLEW_LOW, SIMD_VERSION_SSE1, SIMD_EMIT_SHUFFLE },
441 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
442 { SN_SubtractWithSaturation, OP_PSUBW_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
443 { SN_UnpackHigh, OP_UNPACK_HIGHW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
444 { SN_UnpackLow, OP_UNPACK_LOWW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
445 { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
446 { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
447 { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
448 { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
449 { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
450 { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
451 { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
452 { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
453 { SN_op_Addition, OP_PADDW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
454 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
455 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
456 { SN_op_Equality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
457 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
458 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
459 { SN_op_Inequality, OP_PCMPEQW, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
460 { SN_op_LeftShift, OP_PSHLW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
461 { SN_op_Multiply, OP_PMULW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
462 { SN_op_RightShift, OP_PSARW, SIMD_VERSION_SSE1, SIMD_EMIT_SHIFT },
463 { SN_op_Subtraction, OP_PSUBW, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
464 { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
465 { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
466 { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
467 { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
468 { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
469 { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
470 { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
471 { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
474 static const SimdIntrinsic vector16b_intrinsics[] = {
475 { SN_ctor, OP_EXPAND_I1, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
476 { SN_AddWithSaturation, OP_PADDB_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
477 { SN_Average, OP_PAVGB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
478 { SN_CompareEqual, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
479 { SN_ExtractByteMask, 0, SIMD_VERSION_SSE1, SIMD_EMIT_EXTRACT_MASK },
480 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
481 { SN_Max, OP_PMAXB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
482 { SN_Min, OP_PMINB_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
483 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
484 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
485 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
486 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
487 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
488 { SN_SubtractWithSaturation, OP_PSUBB_SAT_UN, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
489 { SN_SumOfAbsoluteDifferences, OP_PSUM_ABS_DIFF, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
490 { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
491 { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
492 { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
493 { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
494 { SN_get_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
495 { SN_get_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
496 { SN_get_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
497 { SN_get_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
498 { SN_get_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
499 { SN_get_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
500 { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
501 { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
502 { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
503 { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
504 { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
505 { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
506 { SN_get_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
507 { SN_get_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
508 { SN_op_Addition, OP_PADDB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
509 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
510 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
511 { SN_op_Equality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
512 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
513 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
514 { SN_op_Inequality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
515 { SN_op_Subtraction, OP_PSUBB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
516 { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
517 { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
518 { SN_set_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
519 { SN_set_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
520 { SN_set_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
521 { SN_set_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
522 { SN_set_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
523 { SN_set_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
524 { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
525 { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
526 { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
527 { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
528 { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
529 { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
530 { SN_set_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
531 { SN_set_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
538 static const SimdIntrinsic vector16sb_intrinsics[] = {
539 { SN_ctor, OP_EXPAND_I1, SIMD_VERSION_SSE1, SIMD_EMIT_CTOR },
540 { SN_AddWithSaturation, OP_PADDB_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
541 { SN_CompareEqual, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
542 { SN_CompareGreaterThan, OP_PCMPGTB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
543 { SN_ExtractByteMask, 0, SIMD_VERSION_SSE1, SIMD_EMIT_EXTRACT_MASK },
544 { SN_LoadAligned, 0, SIMD_VERSION_SSE1, SIMD_EMIT_LOAD_ALIGNED },
545 { SN_Max, OP_PMAXB, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
546 { SN_Min, OP_PMINB, SIMD_VERSION_SSE41, SIMD_EMIT_BINARY },
547 { SN_PrefetchTemporalAllCacheLevels, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_0 },
548 { SN_PrefetchTemporal1stLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_1 },
549 { SN_PrefetchTemporal2ndLevelCache, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_2 },
550 { SN_PrefetchNonTemporal, 0, SIMD_VERSION_SSE1, SIMD_EMIT_PREFETCH, SIMD_PREFETCH_MODE_NTA },
551 { SN_StoreAligned, OP_STOREX_ALIGNED_MEMBASE_REG, SIMD_VERSION_SSE1, SIMD_EMIT_STORE },
552 { SN_SubtractWithSaturation, OP_PSUBB_SAT, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
553 { SN_UnpackHigh, OP_UNPACK_HIGHB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
554 { SN_UnpackLow, OP_UNPACK_LOWB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
555 { SN_get_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
556 { SN_get_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
557 { SN_get_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
558 { SN_get_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
559 { SN_get_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
560 { SN_get_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
561 { SN_get_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
562 { SN_get_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
563 { SN_get_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
564 { SN_get_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
565 { SN_get_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
566 { SN_get_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
567 { SN_get_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
568 { SN_get_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
569 { SN_get_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
570 { SN_get_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_GETTER },
571 { SN_op_Addition, OP_PADDB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
572 { SN_op_BitwiseAnd, OP_PAND, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
573 { SN_op_BitwiseOr, OP_POR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
574 { SN_op_Equality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
575 { SN_op_ExclusiveOr, OP_PXOR, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
576 { SN_op_Explicit, 0, SIMD_VERSION_SSE1, SIMD_EMIT_CAST },
577 { SN_op_Inequality, OP_PCMPEQB, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_NEQ },
578 { SN_op_Subtraction, OP_PSUBB, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
579 { SN_set_V0, 0, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
580 { SN_set_V1, 1, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
581 { SN_set_V10, 10, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
582 { SN_set_V11, 11, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
583 { SN_set_V12, 12, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
584 { SN_set_V13, 13, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
585 { SN_set_V14, 14, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
586 { SN_set_V15, 15, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
587 { SN_set_V2, 2, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
588 { SN_set_V3, 3, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
589 { SN_set_V4, 4, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
590 { SN_set_V5, 5, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
591 { SN_set_V6, 6, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
592 { SN_set_V7, 7, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
593 { SN_set_V8, 8, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
594 { SN_set_V9, 9, SIMD_VERSION_SSE1, SIMD_EMIT_SETTER },
597 static guint32 simd_supported_versions;
599 static MonoInst* emit_sys_numerics_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args);
600 static MonoInst* emit_sys_numerics_vectors_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args);
602 /*TODO match using number of parameters as well*/
604 simd_intrinsic_compare_by_name (const void *key, const void *value)
606 return strcmp (key, method_name (((SimdIntrinsic *)value)->name));
611 VREG_HAS_XZERO_BB0 = 0x02,
612 VREG_HAS_OTHER_OP_BB0 = 0x04,
613 VREG_SINGLE_BB_USE = 0x08,
614 VREG_MANY_BB_USE = 0x10,
618 mono_simd_intrinsics_init (void)
620 simd_supported_versions = mono_arch_cpu_enumerate_simd_versions ();
621 /*TODO log the supported flags*/
624 static inline gboolean
625 apply_vreg_first_block_interference (MonoCompile *cfg, MonoInst *ins, int reg, int max_vreg, char *vreg_flags)
627 if (reg != -1 && reg <= max_vreg && vreg_flags [reg]) {
628 vreg_flags [reg] &= ~VREG_HAS_XZERO_BB0;
629 vreg_flags [reg] |= VREG_HAS_OTHER_OP_BB0;
630 DEBUG (printf ("[simd-simplify] R%d used: ", reg); mono_print_ins(ins));
636 static inline gboolean
637 apply_vreg_following_block_interference (MonoCompile *cfg, MonoInst *ins, int reg, MonoBasicBlock *bb, int max_vreg, char *vreg_flags, MonoBasicBlock **target_bb)
639 if (reg == -1 || reg > max_vreg || !(vreg_flags [reg] & VREG_HAS_XZERO_BB0) || target_bb [reg] == bb)
642 if (vreg_flags [reg] & VREG_SINGLE_BB_USE) {
643 vreg_flags [reg] &= ~VREG_SINGLE_BB_USE;
644 vreg_flags [reg] |= VREG_MANY_BB_USE;
645 DEBUG (printf ("[simd-simplify] R%d used by many bb: ", reg); mono_print_ins(ins));
647 } else if (!(vreg_flags [reg] & VREG_MANY_BB_USE)) {
648 vreg_flags [reg] |= VREG_SINGLE_BB_USE;
649 target_bb [reg] = bb;
650 DEBUG (printf ("[simd-simplify] R%d first used by: ", reg); mono_print_ins(ins));
657 This pass recalculate which vars need MONO_INST_INDIRECT.
659 We cannot do this for non SIMD vars since code like mono_get_vtable_var
660 uses MONO_INST_INDIRECT to signal that the variable must be stack allocated.
663 mono_simd_simplify_indirection (MonoCompile *cfg)
666 MonoBasicBlock *bb, *first_bb = NULL, **target_bb;
670 for (i = 0; i < cfg->num_varinfo; i++) {
671 MonoInst *var = cfg->varinfo [i];
672 if (var->klass->simd_type) {
673 var->flags &= ~MONO_INST_INDIRECT;
674 max_vreg = MAX (var->dreg, max_vreg);
678 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
679 if (!first_bb && bb->code)
681 for (ins = bb->code; ins; ins = ins->next) {
682 if (ins->opcode == OP_LDADDR) {
683 MonoInst *var = (MonoInst*)ins->inst_p0;
684 if (var->klass->simd_type) {
685 var->flags |= MONO_INST_INDIRECT;
691 DEBUG (printf ("[simd-simplify] max vreg is %d\n", max_vreg));
692 vreg_flags = (char *)g_malloc0 (max_vreg + 1);
693 target_bb = g_new0 (MonoBasicBlock*, max_vreg + 1);
695 for (i = 0; i < cfg->num_varinfo; i++) {
696 MonoInst *var = cfg->varinfo [i];
697 if (var->klass->simd_type && !(var->flags & (MONO_INST_INDIRECT|MONO_INST_VOLATILE))) {
698 vreg_flags [var->dreg] = VREG_USED;
699 DEBUG (printf ("[simd-simplify] processing var %d with vreg %d\n", i, var->dreg));
703 /*Scan the first basic block looking xzeros not used*/
704 for (ins = first_bb->code; ins; ins = ins->next) {
706 int sregs [MONO_MAX_SRC_REGS];
708 if (ins->opcode == OP_XZERO) {
709 if (!(vreg_flags [ins->dreg] & VREG_HAS_OTHER_OP_BB0)) {
710 DEBUG (printf ("[simd-simplify] R%d has vzero: ", ins->dreg); mono_print_ins(ins));
711 vreg_flags [ins->dreg] |= VREG_HAS_XZERO_BB0;
715 if (ins->opcode == OP_LDADDR && apply_vreg_first_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, max_vreg, vreg_flags))
717 if (apply_vreg_first_block_interference (cfg, ins, ins->dreg, max_vreg, vreg_flags))
719 num_sregs = mono_inst_get_src_registers (ins, sregs);
720 for (i = 0; i < num_sregs; ++i) {
721 if (apply_vreg_first_block_interference (cfg, ins, sregs [i], max_vreg, vreg_flags))
726 if (IS_DEBUG_ON (cfg)) {
727 for (i = 0; i < cfg->num_varinfo; i++) {
728 MonoInst *var = cfg->varinfo [i];
729 if (var->klass->simd_type) {
730 if ((vreg_flags [var->dreg] & VREG_HAS_XZERO_BB0))
731 DEBUG (printf ("[simd-simplify] R%d has xzero only\n", var->dreg));
732 if ((vreg_flags [var->dreg] & VREG_HAS_OTHER_OP_BB0))
733 DEBUG (printf ("[simd-simplify] R%d has other ops on bb0\n", var->dreg));
738 /*TODO stop here if no var is xzero only*/
741 Scan all other bb and check if it has only one other use
742 Ideally this would be done after an extended bb formation pass
744 FIXME This pass could use dominator information to properly
745 place the XZERO on the bb that dominates all uses of the var,
746 but this will have zero effect with the current local reg alloc
748 TODO simply the use of flags.
751 for (bb = first_bb->next_bb; bb; bb = bb->next_bb) {
752 for (ins = bb->code; ins; ins = ins->next) {
754 int sregs [MONO_MAX_SRC_REGS];
756 if (ins->opcode == OP_LDADDR && apply_vreg_following_block_interference (cfg, ins, ((MonoInst*)ins->inst_p0)->dreg, bb, max_vreg, vreg_flags, target_bb))
758 if (apply_vreg_following_block_interference (cfg, ins, ins->dreg, bb, max_vreg, vreg_flags, target_bb))
760 num_sregs = mono_inst_get_src_registers (ins, sregs);
761 for (i = 0; i < num_sregs; ++i) {
762 if (apply_vreg_following_block_interference (cfg, ins, sregs [i], bb,
763 max_vreg, vreg_flags, target_bb))
769 for (i = 0; i < cfg->num_varinfo; i++) {
770 MonoInst *var = cfg->varinfo [i];
771 if (!var->klass->simd_type)
773 if ((vreg_flags [var->dreg] & VREG_SINGLE_BB_USE))
774 DEBUG (printf ("[simd-simplify] R%d has single bb use\n", var->dreg));
775 if ((vreg_flags [var->dreg] & VREG_MANY_BB_USE))
776 DEBUG (printf ("[simd-simplify] R%d has many bb in use\n", var->dreg));
778 if (!(vreg_flags [var->dreg] & VREG_SINGLE_BB_USE))
780 for (ins = target_bb [var->dreg]->code; ins; ins = ins->next) {
782 int sregs [MONO_MAX_SRC_REGS];
783 gboolean found = FALSE;
785 num_sregs = mono_inst_get_src_registers (ins, sregs);
786 for (j = 0; j < num_sregs; ++j) {
787 if (sregs [j] == var->dreg)
790 /*We can avoid inserting the XZERO if the first use doesn't depend on the zero'ed value.*/
791 if (ins->dreg == var->dreg && !found) {
792 DEBUG (printf ("[simd-simplify] INGORING R%d on BB %d because first op is a def", i, target_bb [var->dreg]->block_num););
795 DEBUG (printf ("[simd-simplify] Adding XZERO for R%d on BB %d: ", i, target_bb [var->dreg]->block_num); );
797 MONO_INST_NEW (cfg, tmp, OP_XZERO);
798 tmp->dreg = var->dreg;
799 tmp->type = STACK_VTYPE;
800 tmp->klass = var->klass;
801 mono_bblock_insert_before_ins (target_bb [var->dreg], ins, tmp);
807 for (ins = first_bb->code; ins; ins = ins->next) {
808 if (ins->opcode == OP_XZERO && (vreg_flags [ins->dreg] & VREG_SINGLE_BB_USE)) {
809 DEBUG (printf ("[simd-simplify] Nullify %d on first BB: ", ins->dreg); mono_print_ins(ins));
819 * This function expect that src be a value.
822 get_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src)
824 const char *spec = INS_INFO (src->opcode);
826 if (src->opcode == OP_XMOVE) {
828 } else if (spec [MONO_INST_DEST] == 'x') {
830 } else if (src->opcode == OP_VCALL) {
834 g_warning ("get_simd_vreg:: could not infer source simd vreg for op");
835 mono_print_ins (src);
836 g_assert_not_reached ();
840 * This function will load the value if needed.
843 load_simd_vreg_class (MonoCompile *cfg, MonoClass *klass, MonoInst *src, gboolean *indirect)
845 const char *spec = INS_INFO (src->opcode);
849 if (src->opcode == OP_XMOVE) {
851 } else if (src->opcode == OP_LDADDR) {
852 int res = ((MonoInst*)src->inst_p0)->dreg;
855 } else if (spec [MONO_INST_DEST] == 'x') {
857 } else if (src->type == STACK_PTR || src->type == STACK_MP) {
862 MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
864 ins->sreg1 = src->dreg;
865 ins->type = STACK_VTYPE;
866 ins->dreg = alloc_ireg (cfg);
867 MONO_ADD_INS (cfg->cbb, ins);
870 g_warning ("load_simd_vreg:: could not infer source simd (%d) vreg for op", src->type);
871 mono_print_ins (src);
872 g_assert_not_reached ();
876 load_simd_vreg (MonoCompile *cfg, MonoMethod *cmethod, MonoInst *src, gboolean *indirect)
878 return load_simd_vreg_class (cfg, cmethod->klass, src, indirect);
881 /*We share the var with fconv_to_r8_x to save some stack space.*/
883 get_double_spill_area (MonoCompile *cfg)
885 if (!cfg->fconv_to_r8_x_var) {
886 cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
887 cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
889 return cfg->fconv_to_r8_x_var;
892 get_simd_ctor_spill_area (MonoCompile *cfg, MonoClass *avector_klass)
894 if (!cfg->simd_ctor_var) {
895 cfg->simd_ctor_var = mono_compile_create_var (cfg, &avector_klass->byval_arg, OP_LOCAL);
896 cfg->simd_ctor_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
898 return cfg->simd_ctor_var;
902 mono_type_to_expand_op (MonoType *type)
904 switch (type->type) {
922 g_assert_not_reached ();
927 type_to_comp_op (MonoType *t)
947 g_assert_not_reached ();
953 type_to_gt_op (MonoType *t)
970 type_to_padd_op (MonoType *t)
996 type_to_psub_op (MonoType *t)
1022 type_to_pmul_op (MonoType *t)
1036 /* PMULQ multiplies two 32 bit numbers into a 64 bit one */
1047 type_to_pdiv_op (MonoType *t)
1061 get_simd_vreg_or_expanded_scalar (MonoCompile *cfg, MonoClass *klass, MonoType *param_type, MonoInst *src)
1066 if (mono_class_from_mono_type (param_type)->simd_type)
1067 return get_simd_vreg (cfg, NULL, src);
1069 expand_op = mono_type_to_expand_op (param_type);
1070 MONO_INST_NEW (cfg, ins, expand_op);
1072 ins->sreg1 = src->dreg;
1073 ins->type = STACK_VTYPE;
1074 ins->dreg = alloc_ireg (cfg);
1075 MONO_ADD_INS (cfg->cbb, ins);
1077 if (expand_op == OP_EXPAND_R4)
1078 ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
1079 else if (expand_op == OP_EXPAND_R8)
1080 ins->backend.spill_var = get_double_spill_area (cfg);
1086 * simd_intrinsic_emit_binary_op:
1088 * Emit a binary SIMD opcode.
1089 * @LHS/@RHS are the two arguments, they can be either a SIMD type or a scalar one. Scalar arguments are
1090 * expanded to the SIMD type.
1093 simd_intrinsic_emit_binary_op (MonoCompile *cfg, int opcode, int flags, MonoClass *klass, MonoType *lhs_type, MonoType *rhs_type, MonoInst *lhs, MonoInst *rhs)
1096 int left_vreg, right_vreg;
1098 left_vreg = get_simd_vreg_or_expanded_scalar (cfg, klass, lhs_type, lhs);
1099 right_vreg = get_simd_vreg_or_expanded_scalar (cfg, klass, rhs_type, rhs);
1101 MONO_INST_NEW (cfg, ins, opcode);
1103 ins->sreg1 = left_vreg;
1104 ins->sreg2 = right_vreg;
1105 ins->type = STACK_VTYPE;
1106 ins->dreg = alloc_ireg (cfg);
1107 ins->inst_c0 = flags;
1108 MONO_ADD_INS (cfg->cbb, ins);
1113 simd_intrinsic_emit_binary (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1115 MonoMethodSignature *sig = mono_method_signature (cmethod);
1117 g_assert (sig->param_count == 2);
1119 return simd_intrinsic_emit_binary_op (cfg, intrinsic->opcode, intrinsic->flags, cmethod->klass, sig->params [0], sig->params [1], args [0], args [1]);
1123 simd_intrinsic_emit_unary (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1128 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1130 MONO_INST_NEW (cfg, ins, intrinsic->opcode);
1131 ins->klass = cmethod->klass;
1133 ins->type = STACK_VTYPE;
1134 ins->dreg = alloc_ireg (cfg);
1135 MONO_ADD_INS (cfg->cbb, ins);
1140 mono_type_to_extract_op (MonoType *type)
1142 switch (type->type) {
1144 return OP_EXTRACT_I1;
1146 return OP_EXTRACT_U1;
1148 return OP_EXTRACT_I2;
1150 return OP_EXTRACT_U2;
1154 return OP_EXTRACT_I4;
1156 g_assert_not_reached ();
1160 /*Returns the amount to shift the element index to get the dword it belongs to*/
1162 mono_type_elements_shift_bits (MonoType *type)
1164 switch (type->type) {
1176 g_assert_not_reached ();
1180 static G_GNUC_UNUSED int
1181 mono_type_to_insert_op (MonoType *type)
1183 switch (type->type) {
1186 return OP_INSERT_I1;
1189 return OP_INSERT_I2;
1192 return OP_INSERT_I4;
1195 return OP_INSERT_I8;
1197 return OP_INSERT_R4;
1199 return OP_INSERT_R8;
1201 g_assert_not_reached ();
1206 mono_type_to_slow_insert_op (MonoType *type)
1208 switch (type->type) {
1211 return OP_INSERTX_U1_SLOW;
1214 return OP_INSERT_I2;
1217 return OP_INSERTX_I4_SLOW;
1220 return OP_INSERTX_I8_SLOW;
1222 return OP_INSERTX_R4_SLOW;
1224 return OP_INSERTX_R8_SLOW;
1226 g_assert_not_reached ();
1231 simd_intrinsic_emit_setter (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1234 MonoMethodSignature *sig = mono_method_signature (cmethod);
1239 size = mono_type_size (sig->params [0], &align);
1241 if (COMPILE_LLVM (cfg)) {
1242 MONO_INST_NEW (cfg, ins, mono_type_to_insert_op (sig->params [0]));
1243 ins->klass = cmethod->klass;
1244 ins->dreg = ins->sreg1 = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
1245 ins->sreg2 = args [1]->dreg;
1246 ins->inst_c0 = intrinsic->opcode;
1247 MONO_ADD_INS (cfg->cbb, ins);
1248 } else if (size == 2 || size == 4 || size == 8) {
1249 MONO_INST_NEW (cfg, ins, mono_type_to_slow_insert_op (sig->params [0]));
1250 ins->klass = cmethod->klass;
1251 /*This is a partial load so we encode the dependency on the previous value by setting dreg and sreg1 to the same value.*/
1252 ins->dreg = ins->sreg1 = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
1253 ins->sreg2 = args [1]->dreg;
1254 ins->inst_c0 = intrinsic->opcode;
1255 if (sig->params [0]->type == MONO_TYPE_R4)
1256 ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
1257 else if (sig->params [0]->type == MONO_TYPE_R8)
1258 ins->backend.spill_var = get_double_spill_area (cfg);
1259 MONO_ADD_INS (cfg->cbb, ins);
1263 MONO_INST_NEW (cfg, ins, OP_EXTRACTX_U2);
1264 ins->klass = cmethod->klass;
1265 ins->sreg1 = sreg = dreg = load_simd_vreg (cfg, cmethod, args [0], &indirect);
1266 ins->type = STACK_I4;
1267 ins->dreg = vreg = alloc_ireg (cfg);
1268 ins->inst_c0 = intrinsic->opcode / 2;
1269 MONO_ADD_INS (cfg->cbb, ins);
1271 MONO_INST_NEW (cfg, ins, OP_INSERTX_U1_SLOW);
1272 ins->klass = cmethod->klass;
1274 ins->sreg2 = args [1]->dreg;
1276 ins->inst_c0 = intrinsic->opcode;
1277 MONO_ADD_INS (cfg->cbb, ins);
1281 MONO_INST_NEW (cfg, ins, OP_STOREX_MEMBASE);
1282 ins->klass = cmethod->klass;
1283 ins->dreg = args [0]->dreg;
1285 MONO_ADD_INS (cfg->cbb, ins);
1291 * simd_intrinsic_emit_getter_op:
1293 * Emit IR for loading an element of a SIMD value.
1295 * @klass is the simd type, @type is the element type.
1298 simd_intrinsic_emit_getter_op (MonoCompile *cfg, int index, MonoClass *klass, MonoType *type, MonoInst *arg)
1301 int vreg, shift_bits;
1303 vreg = load_simd_vreg_class (cfg, klass, arg, NULL);
1305 if (type->type == MONO_TYPE_I8 || type->type == MONO_TYPE_U8 || type->type == MONO_TYPE_R8) {
1307 gboolean is_r8 = type->type == MONO_TYPE_R8;
1309 MONO_INST_NEW (cfg, ins, is_r8 ? OP_EXTRACT_R8 : OP_EXTRACT_I8);
1312 ins->inst_c0 = index;
1314 ins->type = STACK_R8;
1315 ins->dreg = alloc_freg (cfg);
1316 ins->backend.spill_var = get_double_spill_area (cfg);
1318 ins->type = STACK_I8;
1319 ins->dreg = alloc_lreg (cfg);
1321 MONO_ADD_INS (cfg->cbb, ins);
1325 shift_bits = mono_type_elements_shift_bits (type);
1327 if ((index >> shift_bits) && !cfg->compile_llvm) {
1328 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
1331 ins->inst_c0 = index >> shift_bits;
1332 ins->type = STACK_VTYPE;
1333 ins->dreg = vreg = alloc_ireg (cfg);
1334 MONO_ADD_INS (cfg->cbb, ins);
1337 MONO_INST_NEW (cfg, ins, mono_type_to_extract_op (type));
1340 ins->type = STACK_I4;
1341 ins->dreg = vreg = alloc_ireg (cfg);
1342 if (cfg->compile_llvm)
1343 ins->inst_c0 = index;
1345 ins->inst_c0 = index & ((1 << shift_bits) - 1);
1346 MONO_ADD_INS (cfg->cbb, ins);
1348 if (type->type == MONO_TYPE_R4) {
1349 MONO_INST_NEW (cfg, ins, cfg->r4fp ? OP_ICONV_TO_R4_RAW : OP_MOVE_I4_TO_F);
1350 ins->klass = mono_defaults.single_class;
1352 ins->type = cfg->r4_stack_type;
1353 ins->dreg = alloc_freg (cfg);
1354 ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
1355 MONO_ADD_INS (cfg->cbb, ins);
1361 simd_intrinsic_emit_getter (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1363 MonoMethodSignature *sig = mono_method_signature (cmethod);
1365 return simd_intrinsic_emit_getter_op (cfg, intrinsic->opcode, cmethod->klass, sig->ret, args [0]);
1369 simd_intrinsic_emit_long_getter (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1373 gboolean is_r8 = mono_method_signature (cmethod)->ret->type == MONO_TYPE_R8;
1375 vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
1377 MONO_INST_NEW (cfg, ins, is_r8 ? OP_EXTRACT_R8 : OP_EXTRACT_I8);
1378 ins->klass = cmethod->klass;
1380 ins->inst_c0 = intrinsic->opcode;
1382 ins->type = STACK_R8;
1383 ins->dreg = alloc_freg (cfg);
1384 ins->backend.spill_var = get_double_spill_area (cfg);
1386 ins->type = STACK_I8;
1387 ins->dreg = alloc_lreg (cfg);
1389 MONO_ADD_INS (cfg->cbb, ins);
1395 simd_intrinsic_emit_ctor (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1397 MonoInst *ins = NULL;
1399 gboolean is_ldaddr = args [0]->opcode == OP_LDADDR;
1400 MonoMethodSignature *sig = mono_method_signature (cmethod);
1401 int store_op = mono_type_to_store_membase (cfg, sig->params [0]);
1402 int arg_size = mono_type_size (sig->params [0], &i);
1405 if (sig->param_count == 1) {
1409 dreg = args [0]->inst_i0->dreg;
1410 NULLIFY_INS (args [0]);
1412 g_assert (args [0]->type == STACK_MP || args [0]->type == STACK_PTR);
1413 dreg = alloc_ireg (cfg);
1417 opcode = intrinsic->opcode;
1419 opcode = mono_type_to_expand_op (sig->params [0]);
1420 MONO_INST_NEW (cfg, ins, opcode);
1421 ins->klass = cmethod->klass;
1422 ins->sreg1 = args [1]->dreg;
1423 ins->type = STACK_VTYPE;
1426 MONO_ADD_INS (cfg->cbb, ins);
1427 if (sig->params [0]->type == MONO_TYPE_R4)
1428 ins->backend.spill_var = mini_get_int_to_float_spill_area (cfg);
1429 else if (sig->params [0]->type == MONO_TYPE_R8)
1430 ins->backend.spill_var = get_double_spill_area (cfg);
1433 MONO_INST_NEW (cfg, ins, OP_STOREX_MEMBASE);
1434 ins->dreg = args [0]->dreg;
1436 MONO_ADD_INS (cfg->cbb, ins);
1442 NEW_VARLOADA (cfg, ins, get_simd_ctor_spill_area (cfg, cmethod->klass), &cmethod->klass->byref_arg);
1443 MONO_ADD_INS (cfg->cbb, ins);
1444 addr_reg = ins->dreg;
1446 g_assert (args [0]->type == STACK_MP || args [0]->type == STACK_PTR);
1447 addr_reg = args [0]->dreg;
1450 for (i = sig->param_count - 1; i >= 0; --i) {
1451 EMIT_NEW_STORE_MEMBASE (cfg, ins, store_op, addr_reg, i * arg_size, args [i + 1]->dreg);
1454 if (sig->param_count * arg_size < 16) {
1455 /* If there are not enough arguments, fill the rest with 0s */
1456 for (i = sig->param_count; i < 16 / arg_size; ++i) {
1459 MONO_EMIT_NEW_STORE_MEMBASE_IMM (cfg, OP_STOREI4_MEMBASE_IMM, addr_reg, i * arg_size, 0);
1462 g_assert_not_reached ();
1468 if (is_ldaddr) { /*Eliminate LDADDR if it's initing a local var*/
1469 int vreg = ((MonoInst*)args [0]->inst_p0)->dreg;
1470 NULLIFY_INS (args [0]);
1472 MONO_INST_NEW (cfg, ins, OP_LOADX_MEMBASE);
1473 ins->klass = cmethod->klass;
1474 ins->sreg1 = addr_reg;
1475 ins->type = STACK_VTYPE;
1477 MONO_ADD_INS (cfg->cbb, ins);
1483 simd_intrinsic_emit_cast (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1489 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1491 if (cmethod->is_inflated)
1493 klass = mono_class_from_mono_type (mono_method_signature (cmethod)->ret);
1495 klass = cmethod->klass;
1497 MONO_INST_NEW (cfg, ins, OP_XMOVE);
1499 ins->type = STACK_VTYPE;
1501 ins->dreg = alloc_ireg (cfg);
1502 MONO_ADD_INS (cfg->cbb, ins);
1507 simd_intrinsic_emit_shift (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1510 int vreg, vreg2 = -1, opcode = intrinsic->opcode;
1512 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1514 if (args [1]->opcode != OP_ICONST) {
1515 MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
1516 ins->klass = mono_defaults.int32_class;
1517 ins->sreg1 = args [1]->dreg;
1518 ins->type = STACK_I4;
1519 ins->dreg = vreg2 = alloc_ireg (cfg);
1520 MONO_ADD_INS (cfg->cbb, ins);
1522 ++opcode; /*The shift_reg version op is always +1 from the regular one.*/
1525 MONO_INST_NEW (cfg, ins, opcode);
1526 ins->klass = cmethod->klass;
1530 if (args [1]->opcode == OP_ICONST) {
1531 ins->inst_imm = args [1]->inst_c0;
1532 NULLIFY_INS (args [1]);
1535 ins->type = STACK_VTYPE;
1536 ins->dreg = alloc_ireg (cfg);
1537 MONO_ADD_INS (cfg->cbb, ins);
1541 static inline gboolean
1542 mono_op_is_packed_compare (int op)
1544 return op >= OP_PCMPEQB && op <= OP_PCMPEQQ;
1548 simd_intrinsic_emit_equality_op (MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args, int opcode, int flags)
1551 int left_vreg, right_vreg, tmp_vreg;
1553 left_vreg = load_simd_vreg (cfg, cmethod, args [0], NULL);
1554 right_vreg = get_simd_vreg (cfg, cmethod, args [1]);
1556 MONO_INST_NEW (cfg, ins, opcode);
1557 ins->klass = cmethod->klass;
1558 ins->sreg1 = left_vreg;
1559 ins->sreg2 = right_vreg;
1560 ins->type = STACK_VTYPE;
1561 ins->klass = cmethod->klass;
1562 ins->dreg = tmp_vreg = alloc_ireg (cfg);
1563 ins->inst_c0 = flags;
1564 MONO_ADD_INS (cfg->cbb, ins);
1566 /*FIXME the next ops are SSE specific*/
1567 MONO_INST_NEW (cfg, ins, OP_EXTRACT_MASK);
1568 ins->klass = cmethod->klass;
1569 ins->sreg1 = tmp_vreg;
1570 ins->type = STACK_I4;
1571 ins->dreg = tmp_vreg = alloc_ireg (cfg);
1572 MONO_ADD_INS (cfg->cbb, ins);
1574 /*FP ops have a not equal instruction, which means that we must test the results with OR semantics.*/
1575 if (mono_op_is_packed_compare (opcode) || flags == SIMD_COMP_EQ) {
1576 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, tmp_vreg, 0xFFFF);
1577 NEW_UNALU (cfg, ins, flags == SIMD_COMP_EQ ? OP_CEQ : OP_CLT_UN, tmp_vreg, -1);
1579 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, tmp_vreg, 0);
1580 NEW_UNALU (cfg, ins, OP_CGT_UN, tmp_vreg, -1);
1582 MONO_ADD_INS (cfg->cbb, ins);
1587 simd_intrinsic_emit_equality (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1589 return simd_intrinsic_emit_equality_op (cfg, cmethod, args, intrinsic->opcode, intrinsic->flags);
1593 simd_intrinsic_emit_shuffle (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1596 int vreg, vreg2 = -1;
1597 int param_count = mono_method_signature (cmethod)->param_count;
1599 if (args [param_count - 1]->opcode != OP_ICONST) {
1600 /*TODO Shuffle with non literals is not yet supported */
1604 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1605 if (param_count == 3)
1606 vreg2 = get_simd_vreg (cfg, cmethod, args [1]);
1608 NULLIFY_INS (args [param_count - 1]);
1611 MONO_INST_NEW (cfg, ins, intrinsic->opcode);
1612 ins->klass = cmethod->klass;
1615 ins->inst_c0 = args [param_count - 1]->inst_c0;
1616 ins->type = STACK_VTYPE;
1617 ins->dreg = alloc_ireg (cfg);
1618 MONO_ADD_INS (cfg->cbb, ins);
1620 if (param_count == 3 && ins->opcode == OP_PSHUFLED)
1621 ins->opcode = OP_SHUFPS;
1626 simd_intrinsic_emit_load_aligned (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1630 MONO_INST_NEW (cfg, ins, OP_LOADX_ALIGNED_MEMBASE);
1631 ins->klass = cmethod->klass;
1632 ins->sreg1 = args [0]->dreg;
1633 ins->type = STACK_VTYPE;
1634 ins->dreg = alloc_ireg (cfg);
1635 MONO_ADD_INS (cfg->cbb, ins);
1640 simd_intrinsic_emit_store (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1645 vreg = get_simd_vreg (cfg, cmethod, args [1]);
1647 MONO_INST_NEW (cfg, ins, intrinsic->opcode);
1648 ins->klass = cmethod->klass;
1649 ins->dreg = args [0]->dreg;
1651 ins->type = STACK_VTYPE;
1652 MONO_ADD_INS (cfg->cbb, ins);
1657 simd_intrinsic_emit_extract_mask (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1662 vreg = get_simd_vreg (cfg, cmethod, args [0]);
1664 MONO_INST_NEW (cfg, ins, OP_EXTRACT_MASK);
1665 ins->klass = cmethod->klass;
1667 ins->type = STACK_I4;
1668 ins->dreg = alloc_ireg (cfg);
1669 MONO_ADD_INS (cfg->cbb, ins);
1675 simd_intrinsic_emit_prefetch (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1679 MONO_INST_NEW (cfg, ins, OP_PREFETCH_MEMBASE);
1680 ins->klass = cmethod->klass;
1681 ins->sreg1 = args [0]->dreg;
1682 ins->backend.arg_info = intrinsic->flags;
1683 MONO_ADD_INS (cfg->cbb, ins);
1688 simd_intrinsic_emit_const (const SimdIntrinsic *intrinsic, MonoCompile *cfg, MonoMethod *cmethod, MonoInst **args)
1692 MONO_INST_NEW (cfg, ins, intrinsic->opcode);
1693 ins->klass = cmethod->klass;
1694 ins->type = STACK_VTYPE;
1695 ins->dreg = alloc_xreg (cfg);
1696 MONO_ADD_INS (cfg->cbb, ins);
1701 simd_version_name (guint32 version)
1704 case SIMD_VERSION_SSE1:
1706 case SIMD_VERSION_SSE2:
1708 case SIMD_VERSION_SSE3:
1710 case SIMD_VERSION_SSSE3:
1712 case SIMD_VERSION_SSE41:
1714 case SIMD_VERSION_SSE42:
1716 case SIMD_VERSION_SSE4a:
1723 emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args, const SimdIntrinsic *intrinsics, guint32 size)
1725 const SimdIntrinsic *result = (const SimdIntrinsic *)mono_binary_search (cmethod->name, intrinsics, size, sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name);
1727 DEBUG (printf ("function doesn't have a simd intrinsic %s::%s/%d\n", cmethod->klass->name, cmethod->name, fsig->param_count));
1730 if (IS_DEBUG_ON (cfg)) {
1732 printf ("found call to intrinsic %s::%s/%d -> %s\n", cmethod->klass->name, cmethod->name, fsig->param_count, method_name (result->name));
1733 max = fsig->param_count + fsig->hasthis;
1734 for (i = 0; i < max; ++i) {
1735 printf ("param %d: ", i);
1736 mono_print_ins (args [i]);
1739 if (result->simd_version_flags && !(result->simd_version_flags & simd_supported_versions)) {
1740 if (IS_DEBUG_ON (cfg)) {
1742 printf ("function %s::%s/%d requires one of unsuported SIMD instruction set(s): ", cmethod->klass->name, cmethod->name, fsig->param_count);
1743 for (x = 1; x <= SIMD_VERSION_INDEX_END; x++)
1744 if (result->simd_version_flags & (1 << x))
1745 printf ("%s ", simd_version_name (1 << x));
1752 switch (result->simd_emit_mode) {
1753 case SIMD_EMIT_BINARY:
1754 return simd_intrinsic_emit_binary (result, cfg, cmethod, args);
1755 case SIMD_EMIT_UNARY:
1756 return simd_intrinsic_emit_unary (result, cfg, cmethod, args);
1757 case SIMD_EMIT_SETTER:
1758 return simd_intrinsic_emit_setter (result, cfg, cmethod, args);
1759 case SIMD_EMIT_GETTER:
1760 return simd_intrinsic_emit_getter (result, cfg, cmethod, args);
1761 case SIMD_EMIT_GETTER_QWORD:
1762 return simd_intrinsic_emit_long_getter (result, cfg, cmethod, args);
1763 case SIMD_EMIT_CTOR:
1764 return simd_intrinsic_emit_ctor (result, cfg, cmethod, args);
1765 case SIMD_EMIT_CAST:
1766 return simd_intrinsic_emit_cast (result, cfg, cmethod, args);
1767 case SIMD_EMIT_SHUFFLE:
1768 return simd_intrinsic_emit_shuffle (result, cfg, cmethod, args);
1769 case SIMD_EMIT_SHIFT:
1770 return simd_intrinsic_emit_shift (result, cfg, cmethod, args);
1771 case SIMD_EMIT_EQUALITY:
1772 return simd_intrinsic_emit_equality (result, cfg, cmethod, args);
1773 case SIMD_EMIT_LOAD_ALIGNED:
1774 return simd_intrinsic_emit_load_aligned (result, cfg, cmethod, args);
1775 case SIMD_EMIT_STORE:
1776 return simd_intrinsic_emit_store (result, cfg, cmethod, args);
1777 case SIMD_EMIT_EXTRACT_MASK:
1778 return simd_intrinsic_emit_extract_mask (result, cfg, cmethod, args);
1779 case SIMD_EMIT_PREFETCH:
1780 return simd_intrinsic_emit_prefetch (result, cfg, cmethod, args);
1782 g_assert_not_reached ();
1786 mono_emit_vector_ldelema (MonoCompile *cfg, MonoType *array_type, MonoInst *arr, MonoInst *index, gboolean check_bounds)
1790 int mult_reg, add_reg, array_reg, index_reg, index2_reg, index3_reg;
1792 size = mono_array_element_size (mono_class_from_mono_type (array_type));
1793 mult_reg = alloc_preg (cfg);
1794 array_reg = arr->dreg;
1795 index_reg = index->dreg;
1797 #if SIZEOF_VOID_P == 8
1798 /* The array reg is 64 bits but the index reg is only 32 */
1799 index2_reg = alloc_preg (cfg);
1800 MONO_EMIT_NEW_UNALU (cfg, OP_SEXT_I4, index2_reg, index_reg);
1802 index2_reg = index_reg;
1804 index3_reg = alloc_preg (cfg);
1807 MONO_EMIT_BOUNDS_CHECK (cfg, array_reg, MonoArray, max_length, index2_reg);
1808 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_PADD_IMM, index3_reg, index2_reg, 16 / size - 1);
1809 MONO_EMIT_BOUNDS_CHECK (cfg, array_reg, MonoArray, max_length, index3_reg);
1812 add_reg = alloc_preg (cfg);
1814 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_MUL_IMM, mult_reg, index2_reg, size);
1815 MONO_EMIT_NEW_BIALU (cfg, OP_PADD, add_reg, array_reg, mult_reg);
1816 NEW_BIALU_IMM (cfg, ins, OP_PADD_IMM, add_reg, add_reg, MONO_STRUCT_OFFSET (MonoArray, vector));
1817 ins->type = STACK_PTR;
1818 MONO_ADD_INS (cfg->cbb, ins);
1824 emit_array_extension_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
1826 if ((!strcmp ("GetVector", cmethod->name) || !strcmp ("GetVectorAligned", cmethod->name)) && fsig->param_count == 2) {
1828 int addr = mono_emit_vector_ldelema (cfg, fsig->params [0], args [0], args [1], TRUE);
1830 MONO_INST_NEW (cfg, load, !strcmp ("GetVectorAligned", cmethod->name) ? OP_LOADX_ALIGNED_MEMBASE : OP_LOADX_MEMBASE );
1831 load->klass = cmethod->klass;
1833 load->type = STACK_VTYPE;
1834 load->dreg = alloc_ireg (cfg);
1835 MONO_ADD_INS (cfg->cbb, load);
1839 if ((!strcmp ("SetVector", cmethod->name) || !strcmp ("SetVectorAligned", cmethod->name)) && fsig->param_count == 3) {
1841 int vreg = get_simd_vreg (cfg, cmethod, args [1]);
1842 int addr = mono_emit_vector_ldelema (cfg, fsig->params [0], args [0], args [2], TRUE);
1844 MONO_INST_NEW (cfg, store, !strcmp ("SetVectorAligned", cmethod->name) ? OP_STOREX_ALIGNED_MEMBASE_REG : OP_STOREX_MEMBASE);
1845 store->klass = cmethod->klass;
1847 store->sreg1 = vreg;
1848 MONO_ADD_INS (cfg->cbb, store);
1852 if (!strcmp ("IsAligned", cmethod->name) && fsig->param_count == 2) {
1854 int addr = mono_emit_vector_ldelema (cfg, fsig->params [0], args [0], args [1], FALSE);
1856 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_AND_IMM, addr, addr, 15);
1857 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, addr, 0);
1858 NEW_UNALU (cfg, ins, OP_CEQ, addr, -1);
1859 MONO_ADD_INS (cfg->cbb, ins);
1867 emit_simd_runtime_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
1869 if (!strcmp ("get_AccelMode", cmethod->name) && fsig->param_count == 0) {
1871 EMIT_NEW_ICONST (cfg, ins, simd_supported_versions);
1878 is_sys_numerics_assembly (MonoAssembly *assembly)
1880 return !strcmp ("System.Numerics", assembly->aname.name);
1884 is_sys_numerics_vectors_assembly (MonoAssembly *assembly)
1886 return !strcmp ("System.Numerics.Vectors", assembly->aname.name);
1890 mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
1892 const char *class_name;
1894 if (is_sys_numerics_assembly (cmethod->klass->image->assembly))
1895 return emit_sys_numerics_intrinsics (cfg, cmethod, fsig, args);
1897 if (is_sys_numerics_vectors_assembly (cmethod->klass->image->assembly))
1898 return emit_sys_numerics_vectors_intrinsics (cfg, cmethod, fsig, args);
1900 if (strcmp ("Mono.Simd", cmethod->klass->image->assembly->aname.name) ||
1901 strcmp ("Mono.Simd", cmethod->klass->name_space))
1904 class_name = cmethod->klass->name;
1905 if (!strcmp ("SimdRuntime", class_name))
1906 return emit_simd_runtime_intrinsics (cfg, cmethod, fsig, args);
1908 if (!strcmp ("ArrayExtensions", class_name))
1909 return emit_array_extension_intrinsics (cfg, cmethod, fsig, args);
1911 if (!strcmp ("VectorOperations", class_name)) {
1912 if (!(cmethod->flags & METHOD_ATTRIBUTE_STATIC))
1914 class_name = mono_class_from_mono_type (mono_method_signature (cmethod)->params [0])->name;
1915 } else if (!cmethod->klass->simd_type)
1918 cfg->uses_simd_intrinsics = 1;
1919 if (!strcmp ("Vector2d", class_name))
1920 return emit_intrinsics (cfg, cmethod, fsig, args, vector2d_intrinsics, sizeof (vector2d_intrinsics) / sizeof (SimdIntrinsic));
1921 if (!strcmp ("Vector4f", class_name))
1922 return emit_intrinsics (cfg, cmethod, fsig, args, vector4f_intrinsics, sizeof (vector4f_intrinsics) / sizeof (SimdIntrinsic));
1923 if (!strcmp ("Vector2ul", class_name))
1924 return emit_intrinsics (cfg, cmethod, fsig, args, vector2ul_intrinsics, sizeof (vector2ul_intrinsics) / sizeof (SimdIntrinsic));
1925 if (!strcmp ("Vector2l", class_name))
1926 return emit_intrinsics (cfg, cmethod, fsig, args, vector2l_intrinsics, sizeof (vector2l_intrinsics) / sizeof (SimdIntrinsic));
1927 if (!strcmp ("Vector4ui", class_name))
1928 return emit_intrinsics (cfg, cmethod, fsig, args, vector4ui_intrinsics, sizeof (vector4ui_intrinsics) / sizeof (SimdIntrinsic));
1929 if (!strcmp ("Vector4i", class_name))
1930 return emit_intrinsics (cfg, cmethod, fsig, args, vector4i_intrinsics, sizeof (vector4i_intrinsics) / sizeof (SimdIntrinsic));
1931 if (!strcmp ("Vector8us", class_name))
1932 return emit_intrinsics (cfg, cmethod, fsig, args, vector8us_intrinsics, sizeof (vector8us_intrinsics) / sizeof (SimdIntrinsic));
1933 if (!strcmp ("Vector8s", class_name))
1934 return emit_intrinsics (cfg, cmethod, fsig, args, vector8s_intrinsics, sizeof (vector8s_intrinsics) / sizeof (SimdIntrinsic));
1935 if (!strcmp ("Vector16b", class_name))
1936 return emit_intrinsics (cfg, cmethod, fsig, args, vector16b_intrinsics, sizeof (vector16b_intrinsics) / sizeof (SimdIntrinsic));
1937 if (!strcmp ("Vector16sb", class_name))
1938 return emit_intrinsics (cfg, cmethod, fsig, args, vector16sb_intrinsics, sizeof (vector16sb_intrinsics) / sizeof (SimdIntrinsic));
1943 // The entries should be ordered by name
1944 // System.Numerics.Vector2/Vector3/Vector4
1945 static const SimdIntrinsic vector2_intrinsics[] = {
1946 { SN_ctor, OP_EXPAND_R4 },
1948 { SN_Dot, OP_DPPS },
1949 { SN_Equals, OP_COMPPS, SIMD_VERSION_SSE1, SIMD_EMIT_EQUALITY, SIMD_COMP_EQ },
1950 { SN_Max, OP_MAXPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
1951 { SN_Min, OP_MINPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
1952 { SN_SquareRoot, OP_SQRTPS, SIMD_VERSION_SSE1, SIMD_EMIT_UNARY },
1953 { SN_op_Addition, OP_ADDPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
1954 { SN_op_Division, OP_DIVPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
1955 { SN_op_Multiply, OP_MULPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
1956 { SN_op_Subtraction, OP_SUBPS, SIMD_VERSION_SSE1, SIMD_EMIT_BINARY },
1960 emit_vector_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
1962 const SimdIntrinsic *intrins;
1963 MonoMethodSignature *sig = mono_method_signature (cmethod);
1964 MonoType *type = &cmethod->klass->byval_arg;
1967 * Vector2/3/4 are handled the same way, since the underlying SIMD type is the same (4 * r4).
1969 intrins = (const SimdIntrinsic*)mono_binary_search (cmethod->name, vector2_intrinsics, sizeof (vector2_intrinsics) / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name);
1971 //printf ("%s\n", mono_method_full_name (cmethod, 1));
1975 if (cfg->verbose_level > 1) {
1976 char *name = mono_method_full_name (cmethod, TRUE);
1977 printf (" SIMD intrinsic %s\n", name);
1981 switch (intrins->name) {
1983 gboolean match = TRUE;
1984 for (int i = 0; i < fsig->param_count; ++i)
1985 if (fsig->params [i]->type != MONO_TYPE_R4)
1989 return simd_intrinsic_emit_ctor (intrins, cfg, cmethod, args);
1992 if (!(fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && fsig->params [0] == type))
1994 return simd_intrinsic_emit_equality (intrins, cfg, cmethod, args);
1996 if (!(fsig->param_count == 1 && fsig->ret == type && fsig->params [0] == type))
1998 return simd_intrinsic_emit_unary (intrins, cfg, cmethod, args);
2000 if (!(fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_R4 && fsig->params [0] == type && fsig->params [1] == type))
2002 if (COMPILE_LLVM (cfg)) {
2005 ins = simd_intrinsic_emit_binary (intrins, cfg, cmethod, args);
2006 /* The end result is in the lowest element */
2007 return simd_intrinsic_emit_getter_op (cfg, 0, cmethod->klass, mono_method_signature (cmethod)->ret, ins);
2011 // abs(x) = max(x, sub(0,x))
2015 if (!(fsig->param_count == 1 && fsig->ret == type && fsig->params [0] == type))
2018 MONO_INST_NEW (cfg, zero, OP_XZERO);
2019 zero->dreg = alloc_xreg (cfg);
2020 zero->klass = cmethod->klass;
2021 MONO_ADD_INS (cfg->cbb, zero);
2023 sub = simd_intrinsic_emit_binary_op (cfg, OP_SUBPS, 0, cmethod->klass, sig->params [0], sig->params [0], zero, args [0]);
2024 return simd_intrinsic_emit_binary_op (cfg, OP_MAXPS, 0, cmethod->klass, sig->params [0], sig->params [0], args [0], sub);
2028 case SN_op_Addition:
2029 case SN_op_Division:
2030 case SN_op_Multiply:
2031 case SN_op_Subtraction:
2032 if (!(fsig->param_count == 2 && fsig->ret == type && (fsig->params [0] == type || fsig->params [0]->type == MONO_TYPE_R4) && (fsig->params [1] == type || fsig->params [1]->type == MONO_TYPE_R4)))
2034 return simd_intrinsic_emit_binary (intrins, cfg, cmethod, args);
2039 if (cfg->verbose_level > 1) {
2040 char *name = mono_method_full_name (cmethod, TRUE);
2041 printf (" SIMD method %s not handled.\n", name);
2047 static const SimdIntrinsic vector_t_intrinsics[] = {
2053 { SN_GreaterThanOrEqual },
2055 { SN_LessThanOrEqual },
2056 { SN_get_AllOnes, OP_XONES },
2059 { SN_get_Zero, OP_XZERO },
2064 { SN_op_Subtraction }
2068 emit_vector_t_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2070 const SimdIntrinsic *intrins;
2071 MonoType *type, *etype;
2073 int size, len, index;
2075 intrins = (const SimdIntrinsic*)mono_binary_search (cmethod->name, vector_t_intrinsics, sizeof (vector_t_intrinsics) / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_compare_by_name);
2077 //printf ("%s\n", mono_method_full_name (cmethod, 1));
2081 type = &cmethod->klass->byval_arg;
2082 etype = mono_class_get_context (cmethod->klass)->class_inst->type_argv [0];
2083 size = mono_class_value_size (mono_class_from_mono_type (etype), NULL);
2087 if (!MONO_TYPE_IS_PRIMITIVE (etype))
2090 if (cfg->verbose_level > 1) {
2091 char *name = mono_method_full_name (cmethod, TRUE);
2092 printf (" SIMD intrinsic %s\n", name);
2096 switch (intrins->name) {
2098 if (!(fsig->param_count == 0 && fsig->ret->type == MONO_TYPE_I4))
2100 EMIT_NEW_ICONST (cfg, ins, len);
2102 case SN_get_AllOnes:
2104 if (!(fsig->param_count == 0 && mono_metadata_type_equal (fsig->ret, type)))
2106 return simd_intrinsic_emit_const (intrins, cfg, cmethod, args);
2108 g_assert (fsig->param_count == 1);
2109 if (args [1]->opcode != OP_ICONST)
2111 index = args [1]->inst_c0;
2112 if (index < 0 || index >= len)
2114 return simd_intrinsic_emit_getter_op (cfg, index, cmethod->klass, etype, args [0]);
2116 if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype))
2117 return simd_intrinsic_emit_ctor (NULL, cfg, cmethod, args);
2118 if ((fsig->param_count == 1 || fsig->param_count == 2) && (fsig->params [0]->type == MONO_TYPE_SZARRAY)) {
2119 MonoInst *array_ins = args [1];
2120 MonoInst *index_ins;
2121 MonoInst *ldelema_ins;
2125 if (args [0]->opcode != OP_LDADDR)
2128 /* .ctor (T[]) or .ctor (T[], index) */
2130 if (fsig->param_count == 2) {
2131 index_ins = args [2];
2133 EMIT_NEW_ICONST (cfg, index_ins, 0);
2136 /* Emit index check for the end (index + len - 1 < array length) */
2137 end_index_reg = alloc_ireg (cfg);
2138 EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
2139 MONO_EMIT_BOUNDS_CHECK (cfg, array_ins->dreg, MonoArray, max_length, end_index_reg);
2141 /* Load the array slice into the simd reg */
2142 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type (etype), array_ins, index_ins, TRUE);
2143 g_assert (args [0]->opcode == OP_LDADDR);
2144 var = args [0]->inst_p0;
2145 EMIT_NEW_LOAD_MEMBASE (cfg, ins, OP_LOADX_MEMBASE, var->dreg, ldelema_ins->dreg, 0);
2146 ins->klass = cmethod->klass;
2150 case SN_op_Explicit:
2151 return simd_intrinsic_emit_cast (intrins, cfg, cmethod, args);
2153 if (fsig->param_count == 1 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type))
2154 return simd_intrinsic_emit_equality_op (cfg, cmethod, args, type_to_comp_op (etype), SIMD_COMP_EQ);
2155 if (fsig->param_count == 2 && fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], type) && mono_metadata_type_equal (fsig->params [1], type))
2156 return simd_intrinsic_emit_binary_op (cfg, type_to_comp_op (etype), 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
2159 case SN_GreaterThan:
2160 case SN_GreaterThanOrEqual:
2162 MonoInst *cmp1, *cmp2;
2165 switch (etype->type) {
2175 eq_op = type_to_comp_op (etype);
2176 gt_op = type_to_gt_op (etype);
2178 switch (intrins->name) {
2179 case SN_GreaterThan:
2180 return simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
2182 return simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
2183 case SN_LessThanOrEqual:
2184 cmp1 = simd_intrinsic_emit_binary_op (cfg, eq_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
2185 cmp2 = simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [1], args [0]);
2186 return simd_intrinsic_emit_binary_op (cfg, OP_POR, 0, cmethod->klass, fsig->params [0], fsig->params [1], cmp1, cmp2);
2187 case SN_GreaterThanOrEqual:
2188 cmp1 = simd_intrinsic_emit_binary_op (cfg, eq_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
2189 cmp2 = simd_intrinsic_emit_binary_op (cfg, gt_op, 0, cmethod->klass, fsig->params [0], fsig->params [1], args [0], args [1]);
2190 return simd_intrinsic_emit_binary_op (cfg, OP_POR, 0, cmethod->klass, fsig->params [0], fsig->params [1], cmp1, cmp2);
2192 g_assert_not_reached ();
2198 switch (etype->type) {
2202 case MONO_TYPE_U8: {
2206 MONO_INST_NEW (cfg, ins, OP_XMOVE);
2207 ins->klass = cmethod->klass;
2208 ins->type = STACK_VTYPE;
2209 ins->sreg1 = args [0]->dreg;
2210 ins->dreg = alloc_xreg (cfg);
2211 MONO_ADD_INS (cfg->cbb, ins);
2218 case SN_op_Addition:
2219 case SN_op_Subtraction:
2220 case SN_op_Multiply:
2221 case SN_op_Division: {
2222 if (!(fsig->param_count == 2 && mono_metadata_type_equal (fsig->ret, fsig->params [0]) && mono_metadata_type_equal (fsig->params [0], fsig->params [1])))
2225 switch (intrins->name) {
2226 case SN_op_Addition:
2227 op = type_to_padd_op (etype);
2229 case SN_op_Subtraction:
2230 op = type_to_psub_op (etype);
2232 case SN_op_Multiply:
2233 op = type_to_pmul_op (etype);
2235 case SN_op_Division:
2236 op = type_to_pdiv_op (etype);
2239 g_assert_not_reached ();
2242 return simd_intrinsic_emit_binary_op (cfg, op, 0, cmethod->klass, fsig->params [0], fsig->params [0], args [0], args [1]);
2246 MonoInst *array_ins = args [1];
2247 MonoInst *index_ins = args [2];
2248 MonoInst *ldelema_ins;
2252 if (args [0]->opcode != OP_LDADDR)
2255 /* Emit index check for the end (index + len - 1 < array length) */
2256 end_index_reg = alloc_ireg (cfg);
2257 EMIT_NEW_BIALU_IMM (cfg, ins, OP_IADD_IMM, end_index_reg, index_ins->dreg, len - 1);
2259 int length_reg = alloc_ireg (cfg);
2260 MONO_EMIT_NEW_LOAD_MEMBASE_OP_FAULT (cfg, OP_LOADI4_MEMBASE, length_reg, array_ins->dreg, MONO_STRUCT_OFFSET (MonoArray, max_length));
2261 MONO_EMIT_NEW_BIALU (cfg, OP_COMPARE, -1, length_reg, end_index_reg);
2262 MONO_EMIT_NEW_COND_EXC (cfg, LE_UN, "ArgumentException");
2264 /* Load the simd reg into the array slice */
2265 ldelema_ins = mini_emit_ldelema_1_ins (cfg, mono_class_from_mono_type (etype), array_ins, index_ins, TRUE);
2266 g_assert (args [0]->opcode == OP_LDADDR);
2267 var = args [0]->inst_p0;
2268 EMIT_NEW_STORE_MEMBASE (cfg, ins, OP_STOREX_MEMBASE, ldelema_ins->dreg, 0, var->dreg);
2269 ins->klass = cmethod->klass;
2277 if (cfg->verbose_level > 1) {
2278 char *name = mono_method_full_name (cmethod, TRUE);
2279 printf (" SIMD method %s not handled.\n", name);
2287 * emit_sys_numerics_intrinsics:
2289 * Emit intrinsics for the System.Numerics assembly.
2292 emit_sys_numerics_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2294 const char *nspace = cmethod->klass->name_space;
2295 const char *class_name = cmethod->klass->name;
2297 if (!strcmp ("Vector2", class_name) || !strcmp ("Vector4", class_name) || !strcmp ("Vector3", class_name))
2298 return emit_vector_intrinsics (cfg, cmethod, fsig, args);
2300 if (!strcmp ("Vector`1", class_name))
2301 return emit_vector_t_intrinsics (cfg, cmethod, fsig, args);
2303 if (!strcmp ("System.Numerics", nspace) && !strcmp ("Vector", class_name)) {
2304 if (!strcmp (cmethod->name, "get_IsHardwareAccelerated")) {
2307 if (simd_supported_versions)
2308 EMIT_NEW_ICONST (cfg, ins, 1);
2310 EMIT_NEW_ICONST (cfg, ins, 0);
2311 ins->type = STACK_I4;
2320 emit_sys_numerics_vectors_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
2322 const char *class_name = cmethod->klass->name;
2324 if (!strcmp (class_name, "Vector`1"))
2325 return emit_vector_t_intrinsics (cfg, cmethod, fsig, args);
2330 mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr)
2332 if (is_sys_numerics_assembly (field->parent->image->assembly)) {
2335 if (!strcmp (field->parent->name, "Vector2") ||
2336 !strcmp (field->parent->name, "Vector3") ||
2337 !strcmp (field->parent->name, "Vector4")) {
2338 if (!strcmp (field->name, "X"))
2340 else if (!strcmp (field->name, "Y"))
2342 else if (!strcmp (field->name, "Z"))
2344 else if (!strcmp (field->name, "W"))
2349 if (cfg->verbose_level > 1)
2350 printf (" SIMD intrinsic field access: %s\n", field->name);
2352 return simd_intrinsic_emit_getter_op (cfg, index, field->parent, mono_field_get_type (field), addr);
2358 #endif /* DISABLE_JIT */
2363 mono_emit_simd_field_load (MonoCompile *cfg, MonoClassField *field, MonoInst *addr)
2368 #endif /* MONO_ARCH_SIMD_INTRINSICS */