Tue Aug 21 16:40:04 CEST 2007 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/utils/mono-math.h>
23
24 #include "trace.h"
25 #include "mini-x86.h"
26 #include "inssel.h"
27 #include "cpu-x86.h"
28
29 /* On windows, these hold the key returned by TlsAlloc () */
30 static gint lmf_tls_offset = -1;
31 static gint lmf_addr_tls_offset = -1;
32 static gint appdomain_tls_offset = -1;
33 static gint thread_tls_offset = -1;
34
35 #ifdef MONO_XEN_OPT
36 static gboolean optimize_for_xen = TRUE;
37 #else
38 #define optimize_for_xen 0
39 #endif
40
41 #ifdef PLATFORM_WIN32
42 static gboolean is_win32 = TRUE;
43 #else
44 static gboolean is_win32 = FALSE;
45 #endif
46
47 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
48
49 #define ARGS_OFFSET 8
50
51 #ifdef PLATFORM_WIN32
52 /* Under windows, the default pinvoke calling convention is stdcall */
53 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
54 #else
55 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
56 #endif
57
58 #define NOT_IMPLEMENTED g_assert_not_reached ()
59
60 const char*
61 mono_arch_regname (int reg) {
62         switch (reg) {
63         case X86_EAX: return "%eax";
64         case X86_EBX: return "%ebx";
65         case X86_ECX: return "%ecx";
66         case X86_EDX: return "%edx";
67         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
68         case X86_EDI: return "%edi";
69         case X86_ESI: return "%esi";
70         }
71         return "unknown";
72 }
73
74 const char*
75 mono_arch_fregname (int reg) {
76         return "unknown";
77 }
78
79 typedef enum {
80         ArgInIReg,
81         ArgInFloatSSEReg,
82         ArgInDoubleSSEReg,
83         ArgOnStack,
84         ArgValuetypeInReg,
85         ArgOnFloatFpStack,
86         ArgOnDoubleFpStack,
87         ArgNone
88 } ArgStorage;
89
90 typedef struct {
91         gint16 offset;
92         gint8  reg;
93         ArgStorage storage;
94
95         /* Only if storage == ArgValuetypeInReg */
96         ArgStorage pair_storage [2];
97         gint8 pair_regs [2];
98 } ArgInfo;
99
100 typedef struct {
101         int nargs;
102         guint32 stack_usage;
103         guint32 reg_usage;
104         guint32 freg_usage;
105         gboolean need_stack_align;
106         guint32 stack_align_amount;
107         ArgInfo ret;
108         ArgInfo sig_cookie;
109         ArgInfo args [1];
110 } CallInfo;
111
112 #define PARAM_REGS 0
113
114 #define FLOAT_PARAM_REGS 0
115
116 static X86_Reg_No param_regs [] = { 0 };
117
118 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
119 #define SMALL_STRUCTS_IN_REGS
120 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
121 #endif
122
123 static void inline
124 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
125 {
126     ainfo->offset = *stack_size;
127
128     if (*gr >= PARAM_REGS) {
129                 ainfo->storage = ArgOnStack;
130                 (*stack_size) += sizeof (gpointer);
131     }
132     else {
133                 ainfo->storage = ArgInIReg;
134                 ainfo->reg = param_regs [*gr];
135                 (*gr) ++;
136     }
137 }
138
139 static void inline
140 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
141 {
142         ainfo->offset = *stack_size;
143
144         g_assert (PARAM_REGS == 0);
145         
146         ainfo->storage = ArgOnStack;
147         (*stack_size) += sizeof (gpointer) * 2;
148 }
149
150 static void inline
151 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
152 {
153     ainfo->offset = *stack_size;
154
155     if (*gr >= FLOAT_PARAM_REGS) {
156                 ainfo->storage = ArgOnStack;
157                 (*stack_size) += is_double ? 8 : 4;
158     }
159     else {
160                 /* A double register */
161                 if (is_double)
162                         ainfo->storage = ArgInDoubleSSEReg;
163                 else
164                         ainfo->storage = ArgInFloatSSEReg;
165                 ainfo->reg = *gr;
166                 (*gr) += 1;
167     }
168 }
169
170
171 static void
172 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
173                gboolean is_return,
174                guint32 *gr, guint32 *fr, guint32 *stack_size)
175 {
176         guint32 size;
177         MonoClass *klass;
178
179         klass = mono_class_from_mono_type (type);
180         if (sig->pinvoke) 
181                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
182         else 
183                 size = mono_type_stack_size (&klass->byval_arg, NULL);
184
185 #ifdef SMALL_STRUCTS_IN_REGS
186         if (sig->pinvoke && is_return) {
187                 MonoMarshalType *info;
188
189                 /*
190                  * the exact rules are not very well documented, the code below seems to work with the 
191                  * code generated by gcc 3.3.3 -mno-cygwin.
192                  */
193                 info = mono_marshal_load_type_info (klass);
194                 g_assert (info);
195
196                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
197
198                 /* Special case structs with only a float member */
199                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
200                         ainfo->storage = ArgValuetypeInReg;
201                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
202                         return;
203                 }
204                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
205                         ainfo->storage = ArgValuetypeInReg;
206                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
207                         return;
208                 }               
209                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
210                         ainfo->storage = ArgValuetypeInReg;
211                         ainfo->pair_storage [0] = ArgInIReg;
212                         ainfo->pair_regs [0] = return_regs [0];
213                         if (info->native_size > 4) {
214                                 ainfo->pair_storage [1] = ArgInIReg;
215                                 ainfo->pair_regs [1] = return_regs [1];
216                         }
217                         return;
218                 }
219         }
220 #endif
221
222         ainfo->offset = *stack_size;
223         ainfo->storage = ArgOnStack;
224         *stack_size += ALIGN_TO (size, sizeof (gpointer));
225 }
226
227 /*
228  * get_call_info:
229  *
230  *  Obtain information about a call according to the calling convention.
231  * For x86 ELF, see the "System V Application Binary Interface Intel386 
232  * Architecture Processor Supplment, Fourth Edition" document for more
233  * information.
234  * For x86 win32, see ???.
235  */
236 static CallInfo*
237 get_call_info (MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
238 {
239         guint32 i, gr, fr;
240         MonoType *ret_type;
241         int n = sig->hasthis + sig->param_count;
242         guint32 stack_size = 0;
243         CallInfo *cinfo;
244
245         if (mp)
246                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
247         else
248                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
249
250         gr = 0;
251         fr = 0;
252
253         /* return value */
254         {
255                 ret_type = mono_type_get_underlying_type (sig->ret);
256                 switch (ret_type->type) {
257                 case MONO_TYPE_BOOLEAN:
258                 case MONO_TYPE_I1:
259                 case MONO_TYPE_U1:
260                 case MONO_TYPE_I2:
261                 case MONO_TYPE_U2:
262                 case MONO_TYPE_CHAR:
263                 case MONO_TYPE_I4:
264                 case MONO_TYPE_U4:
265                 case MONO_TYPE_I:
266                 case MONO_TYPE_U:
267                 case MONO_TYPE_PTR:
268                 case MONO_TYPE_FNPTR:
269                 case MONO_TYPE_CLASS:
270                 case MONO_TYPE_OBJECT:
271                 case MONO_TYPE_SZARRAY:
272                 case MONO_TYPE_ARRAY:
273                 case MONO_TYPE_STRING:
274                         cinfo->ret.storage = ArgInIReg;
275                         cinfo->ret.reg = X86_EAX;
276                         break;
277                 case MONO_TYPE_U8:
278                 case MONO_TYPE_I8:
279                         cinfo->ret.storage = ArgInIReg;
280                         cinfo->ret.reg = X86_EAX;
281                         break;
282                 case MONO_TYPE_R4:
283                         cinfo->ret.storage = ArgOnFloatFpStack;
284                         break;
285                 case MONO_TYPE_R8:
286                         cinfo->ret.storage = ArgOnDoubleFpStack;
287                         break;
288                 case MONO_TYPE_GENERICINST:
289                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
290                                 cinfo->ret.storage = ArgInIReg;
291                                 cinfo->ret.reg = X86_EAX;
292                                 break;
293                         }
294                         /* Fall through */
295                 case MONO_TYPE_VALUETYPE: {
296                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
297
298                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
299                         if (cinfo->ret.storage == ArgOnStack)
300                                 /* The caller passes the address where the value is stored */
301                                 add_general (&gr, &stack_size, &cinfo->ret);
302                         break;
303                 }
304                 case MONO_TYPE_TYPEDBYREF:
305                         /* Same as a valuetype with size 24 */
306                         add_general (&gr, &stack_size, &cinfo->ret);
307                         ;
308                         break;
309                 case MONO_TYPE_VOID:
310                         cinfo->ret.storage = ArgNone;
311                         break;
312                 default:
313                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
314                 }
315         }
316
317         /* this */
318         if (sig->hasthis)
319                 add_general (&gr, &stack_size, cinfo->args + 0);
320
321         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
322                 gr = PARAM_REGS;
323                 fr = FLOAT_PARAM_REGS;
324                 
325                 /* Emit the signature cookie just before the implicit arguments */
326                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
327         }
328
329         for (i = 0; i < sig->param_count; ++i) {
330                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
331                 MonoType *ptype;
332
333                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
334                         /* We allways pass the sig cookie on the stack for simplicity */
335                         /* 
336                          * Prevent implicit arguments + the sig cookie from being passed 
337                          * in registers.
338                          */
339                         gr = PARAM_REGS;
340                         fr = FLOAT_PARAM_REGS;
341
342                         /* Emit the signature cookie just before the implicit arguments */
343                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
344                 }
345
346                 if (sig->params [i]->byref) {
347                         add_general (&gr, &stack_size, ainfo);
348                         continue;
349                 }
350                 ptype = mono_type_get_underlying_type (sig->params [i]);
351                 switch (ptype->type) {
352                 case MONO_TYPE_BOOLEAN:
353                 case MONO_TYPE_I1:
354                 case MONO_TYPE_U1:
355                         add_general (&gr, &stack_size, ainfo);
356                         break;
357                 case MONO_TYPE_I2:
358                 case MONO_TYPE_U2:
359                 case MONO_TYPE_CHAR:
360                         add_general (&gr, &stack_size, ainfo);
361                         break;
362                 case MONO_TYPE_I4:
363                 case MONO_TYPE_U4:
364                         add_general (&gr, &stack_size, ainfo);
365                         break;
366                 case MONO_TYPE_I:
367                 case MONO_TYPE_U:
368                 case MONO_TYPE_PTR:
369                 case MONO_TYPE_FNPTR:
370                 case MONO_TYPE_CLASS:
371                 case MONO_TYPE_OBJECT:
372                 case MONO_TYPE_STRING:
373                 case MONO_TYPE_SZARRAY:
374                 case MONO_TYPE_ARRAY:
375                         add_general (&gr, &stack_size, ainfo);
376                         break;
377                 case MONO_TYPE_GENERICINST:
378                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
379                                 add_general (&gr, &stack_size, ainfo);
380                                 break;
381                         }
382                         /* Fall through */
383                 case MONO_TYPE_VALUETYPE:
384                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
385                         break;
386                 case MONO_TYPE_TYPEDBYREF:
387                         stack_size += sizeof (MonoTypedRef);
388                         ainfo->storage = ArgOnStack;
389                         break;
390                 case MONO_TYPE_U8:
391                 case MONO_TYPE_I8:
392                         add_general_pair (&gr, &stack_size, ainfo);
393                         break;
394                 case MONO_TYPE_R4:
395                         add_float (&fr, &stack_size, ainfo, FALSE);
396                         break;
397                 case MONO_TYPE_R8:
398                         add_float (&fr, &stack_size, ainfo, TRUE);
399                         break;
400                 default:
401                         g_error ("unexpected type 0x%x", ptype->type);
402                         g_assert_not_reached ();
403                 }
404         }
405
406         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
407                 gr = PARAM_REGS;
408                 fr = FLOAT_PARAM_REGS;
409                 
410                 /* Emit the signature cookie just before the implicit arguments */
411                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
412         }
413
414 #if defined(__APPLE__)
415         if ((stack_size % 16) != 0) { 
416                 cinfo->need_stack_align = TRUE;
417                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
418         }
419 #endif
420
421         cinfo->stack_usage = stack_size;
422         cinfo->reg_usage = gr;
423         cinfo->freg_usage = fr;
424         return cinfo;
425 }
426
427 /*
428  * mono_arch_get_argument_info:
429  * @csig:  a method signature
430  * @param_count: the number of parameters to consider
431  * @arg_info: an array to store the result infos
432  *
433  * Gathers information on parameters such as size, alignment and
434  * padding. arg_info should be large enought to hold param_count + 1 entries. 
435  *
436  * Returns the size of the activation frame.
437  */
438 int
439 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
440 {
441         int k, frame_size = 0;
442         int size, pad;
443         guint32 align;
444         int offset = 8;
445         CallInfo *cinfo;
446
447         cinfo = get_call_info (NULL, csig, FALSE);
448
449         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
450                 frame_size += sizeof (gpointer);
451                 offset += 4;
452         }
453
454         arg_info [0].offset = offset;
455
456         if (csig->hasthis) {
457                 frame_size += sizeof (gpointer);
458                 offset += 4;
459         }
460
461         arg_info [0].size = frame_size;
462
463         for (k = 0; k < param_count; k++) {
464                 
465                 if (csig->pinvoke)
466                         size = mono_type_native_stack_size (csig->params [k], &align);
467                 else {
468                         int ialign;
469                         size = mono_type_stack_size (csig->params [k], &ialign);
470                         align = ialign;
471                 }
472
473                 /* ignore alignment for now */
474                 align = 1;
475
476                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
477                 arg_info [k].pad = pad;
478                 frame_size += size;
479                 arg_info [k + 1].pad = 0;
480                 arg_info [k + 1].size = size;
481                 offset += pad;
482                 arg_info [k + 1].offset = offset;
483                 offset += size;
484         }
485
486         align = MONO_ARCH_FRAME_ALIGNMENT;
487         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
488         arg_info [k].pad = pad;
489
490         g_free (cinfo);
491
492         return frame_size;
493 }
494
495 static const guchar cpuid_impl [] = {
496         0x55,                           /* push   %ebp */
497         0x89, 0xe5,                     /* mov    %esp,%ebp */
498         0x53,                           /* push   %ebx */
499         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
500         0x0f, 0xa2,                     /* cpuid   */
501         0x50,                           /* push   %eax */
502         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
503         0x89, 0x18,                     /* mov    %ebx,(%eax) */
504         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
505         0x89, 0x08,                     /* mov    %ecx,(%eax) */
506         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
507         0x89, 0x10,                     /* mov    %edx,(%eax) */
508         0x58,                           /* pop    %eax */
509         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
510         0x89, 0x02,                     /* mov    %eax,(%edx) */
511         0x5b,                           /* pop    %ebx */
512         0xc9,                           /* leave   */
513         0xc3,                           /* ret     */
514 };
515
516 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
517
518 static int 
519 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
520 {
521         int have_cpuid = 0;
522 #ifndef _MSC_VER
523         __asm__  __volatile__ (
524                 "pushfl\n"
525                 "popl %%eax\n"
526                 "movl %%eax, %%edx\n"
527                 "xorl $0x200000, %%eax\n"
528                 "pushl %%eax\n"
529                 "popfl\n"
530                 "pushfl\n"
531                 "popl %%eax\n"
532                 "xorl %%edx, %%eax\n"
533                 "andl $0x200000, %%eax\n"
534                 "movl %%eax, %0"
535                 : "=r" (have_cpuid)
536                 :
537                 : "%eax", "%edx"
538         );
539 #else
540         __asm {
541                 pushfd
542                 pop eax
543                 mov edx, eax
544                 xor eax, 0x200000
545                 push eax
546                 popfd
547                 pushfd
548                 pop eax
549                 xor eax, edx
550                 and eax, 0x200000
551                 mov have_cpuid, eax
552         }
553 #endif
554         if (have_cpuid) {
555                 /* Have to use the code manager to get around WinXP DEP */
556                 static CpuidFunc func = NULL;
557                 void *ptr;
558                 if (!func) {
559                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
560                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
561                         func = (CpuidFunc)ptr;
562                 }
563                 func (id, p_eax, p_ebx, p_ecx, p_edx);
564
565                 /*
566                  * We use this approach because of issues with gcc and pic code, see:
567                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
568                 __asm__ __volatile__ ("cpuid"
569                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
570                         : "a" (id));
571                 */
572                 return 1;
573         }
574         return 0;
575 }
576
577 /*
578  * Initialize the cpu to execute managed code.
579  */
580 void
581 mono_arch_cpu_init (void)
582 {
583         /* spec compliance requires running with double precision */
584 #ifndef _MSC_VER
585         guint16 fpcw;
586
587         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
588         fpcw &= ~X86_FPCW_PRECC_MASK;
589         fpcw |= X86_FPCW_PREC_DOUBLE;
590         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
591         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
592 #else
593         _control87 (_PC_53, MCW_PC);
594 #endif
595 }
596
597 /*
598  * This function returns the optimizations supported on this cpu.
599  */
600 guint32
601 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
602 {
603         int eax, ebx, ecx, edx;
604         guint32 opts = 0;
605         
606         *exclude_mask = 0;
607         /* Feature Flags function, flags returned in EDX. */
608         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
609                 if (edx & (1 << 15)) {
610                         opts |= MONO_OPT_CMOV;
611                         if (edx & 1)
612                                 opts |= MONO_OPT_FCMOV;
613                         else
614                                 *exclude_mask |= MONO_OPT_FCMOV;
615                 } else
616                         *exclude_mask |= MONO_OPT_CMOV;
617                 if (edx & (1 << 26))
618                         opts |= MONO_OPT_SSE2;
619                 else
620                         *exclude_mask |= MONO_OPT_SSE2;
621         }
622         return opts;
623 }
624
625 /*
626  * Determine whenever the trap whose info is in SIGINFO is caused by
627  * integer overflow.
628  */
629 gboolean
630 mono_arch_is_int_overflow (void *sigctx, void *info)
631 {
632         MonoContext ctx;
633         guint8* ip;
634
635         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
636
637         ip = (guint8*)ctx.eip;
638
639         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
640                 gint32 reg;
641
642                 /* idiv REG */
643                 switch (x86_modrm_rm (ip [1])) {
644                 case X86_EAX:
645                         reg = ctx.eax;
646                         break;
647                 case X86_ECX:
648                         reg = ctx.ecx;
649                         break;
650                 case X86_EDX:
651                         reg = ctx.edx;
652                         break;
653                 case X86_EBX:
654                         reg = ctx.ebx;
655                         break;
656                 case X86_ESI:
657                         reg = ctx.esi;
658                         break;
659                 case X86_EDI:
660                         reg = ctx.edi;
661                         break;
662                 default:
663                         g_assert_not_reached ();
664                         reg = -1;
665                 }
666
667                 if (reg == -1)
668                         return TRUE;
669         }
670                         
671         return FALSE;
672 }
673
674 GList *
675 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
676 {
677         GList *vars = NULL;
678         int i;
679
680         for (i = 0; i < cfg->num_varinfo; i++) {
681                 MonoInst *ins = cfg->varinfo [i];
682                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
683
684                 /* unused vars */
685                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
686                         continue;
687
688                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
689                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
690                         continue;
691
692                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
693                  * 8bit quantities in caller saved registers on x86 */
694                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
695                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
696                         g_assert (i == vmv->idx);
697                         vars = g_list_prepend (vars, vmv);
698                 }
699         }
700
701         vars = mono_varlist_sort (cfg, vars, 0);
702
703         return vars;
704 }
705
706 GList *
707 mono_arch_get_global_int_regs (MonoCompile *cfg)
708 {
709         GList *regs = NULL;
710
711         /* we can use 3 registers for global allocation */
712         regs = g_list_prepend (regs, (gpointer)X86_EBX);
713         regs = g_list_prepend (regs, (gpointer)X86_ESI);
714         regs = g_list_prepend (regs, (gpointer)X86_EDI);
715
716         return regs;
717 }
718
719 /*
720  * mono_arch_regalloc_cost:
721  *
722  *  Return the cost, in number of memory references, of the action of 
723  * allocating the variable VMV into a register during global register
724  * allocation.
725  */
726 guint32
727 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
728 {
729         MonoInst *ins = cfg->varinfo [vmv->idx];
730
731         if (cfg->method->save_lmf)
732                 /* The register is already saved */
733                 return (ins->opcode == OP_ARG) ? 1 : 0;
734         else
735                 /* push+pop+possible load if it is an argument */
736                 return (ins->opcode == OP_ARG) ? 3 : 2;
737 }
738  
739 /*
740  * Set var information according to the calling convention. X86 version.
741  * The locals var stuff should most likely be split in another method.
742  */
743 void
744 mono_arch_allocate_vars (MonoCompile *cfg)
745 {
746         MonoMethodSignature *sig;
747         MonoMethodHeader *header;
748         MonoInst *inst;
749         guint32 locals_stack_size, locals_stack_align;
750         int i, offset;
751         gint32 *offsets;
752         CallInfo *cinfo;
753
754         header = mono_method_get_header (cfg->method);
755         sig = mono_method_signature (cfg->method);
756
757         cinfo = get_call_info (cfg->mempool, sig, FALSE);
758
759         cfg->frame_reg = MONO_ARCH_BASEREG;
760         offset = 0;
761
762         /* Reserve space to save LMF and caller saved registers */
763
764         if (cfg->method->save_lmf) {
765                 offset += sizeof (MonoLMF);
766         } else {
767                 if (cfg->used_int_regs & (1 << X86_EBX)) {
768                         offset += 4;
769                 }
770
771                 if (cfg->used_int_regs & (1 << X86_EDI)) {
772                         offset += 4;
773                 }
774
775                 if (cfg->used_int_regs & (1 << X86_ESI)) {
776                         offset += 4;
777                 }
778         }
779
780         switch (cinfo->ret.storage) {
781         case ArgValuetypeInReg:
782                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
783                 offset += 8;
784                 cfg->ret->opcode = OP_REGOFFSET;
785                 cfg->ret->inst_basereg = X86_EBP;
786                 cfg->ret->inst_offset = - offset;
787                 break;
788         default:
789                 break;
790         }
791
792         /* Allocate locals */
793         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
794         if (locals_stack_align) {
795                 offset += (locals_stack_align - 1);
796                 offset &= ~(locals_stack_align - 1);
797         }
798         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
799                 if (offsets [i] != -1) {
800                         MonoInst *inst = cfg->varinfo [i];
801                         inst->opcode = OP_REGOFFSET;
802                         inst->inst_basereg = X86_EBP;
803                         inst->inst_offset = - (offset + offsets [i]);
804                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
805                 }
806         }
807         offset += locals_stack_size;
808
809
810         /*
811          * Allocate arguments+return value
812          */
813
814         switch (cinfo->ret.storage) {
815         case ArgOnStack:
816                 cfg->ret->opcode = OP_REGOFFSET;
817                 cfg->ret->inst_basereg = X86_EBP;
818                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
819                 break;
820         case ArgValuetypeInReg:
821                 break;
822         case ArgInIReg:
823                 cfg->ret->opcode = OP_REGVAR;
824                 cfg->ret->inst_c0 = cinfo->ret.reg;
825                 break;
826         case ArgNone:
827         case ArgOnFloatFpStack:
828         case ArgOnDoubleFpStack:
829                 break;
830         default:
831                 g_assert_not_reached ();
832         }
833
834         if (sig->call_convention == MONO_CALL_VARARG) {
835                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
836                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
837         }
838
839         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
840                 ArgInfo *ainfo = &cinfo->args [i];
841                 inst = cfg->args [i];
842                 if (inst->opcode != OP_REGVAR) {
843                         inst->opcode = OP_REGOFFSET;
844                         inst->inst_basereg = X86_EBP;
845                 }
846                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
847         }
848
849         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
850         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
851
852         cfg->stack_offset = offset;
853 }
854
855 void
856 mono_arch_create_vars (MonoCompile *cfg)
857 {
858         MonoMethodSignature *sig;
859         CallInfo *cinfo;
860
861         sig = mono_method_signature (cfg->method);
862
863         cinfo = get_call_info (cfg->mempool, sig, FALSE);
864
865         if (cinfo->ret.storage == ArgValuetypeInReg)
866                 cfg->ret_var_is_local = TRUE;
867 }
868
869 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
870  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
871  */
872
873 static void
874 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
875 {
876         MonoInst *arg;
877         MonoMethodSignature *tmp_sig;
878         MonoInst *sig_arg;
879
880         /* FIXME: Add support for signature tokens to AOT */
881         cfg->disable_aot = TRUE;
882         MONO_INST_NEW (cfg, arg, OP_OUTARG);
883
884         /*
885          * mono_ArgIterator_Setup assumes the signature cookie is 
886          * passed first and all the arguments which were before it are
887          * passed on the stack after the signature. So compensate by 
888          * passing a different signature.
889          */
890         tmp_sig = mono_metadata_signature_dup (call->signature);
891         tmp_sig->param_count -= call->signature->sentinelpos;
892         tmp_sig->sentinelpos = 0;
893         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
894
895         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
896         sig_arg->inst_p0 = tmp_sig;
897
898         arg->inst_left = sig_arg;
899         arg->type = STACK_PTR;
900         /* prepend, so they get reversed */
901         arg->next = call->out_args;
902         call->out_args = arg;
903 }
904
905 /*
906  * It is expensive to adjust esp for each individual fp argument pushed on the stack
907  * so we try to do it just once when we have multiple fp arguments in a row.
908  * We don't use this mechanism generally because for int arguments the generated code
909  * is slightly bigger and new generation cpus optimize away the dependency chains
910  * created by push instructions on the esp value.
911  * fp_arg_setup is the first argument in the execution sequence where the esp register
912  * is modified.
913  */
914 static int
915 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
916 {
917         int fp_space = 0;
918         MonoType *t;
919
920         for (; start_arg < sig->param_count; ++start_arg) {
921                 t = mono_type_get_underlying_type (sig->params [start_arg]);
922                 if (!t->byref && t->type == MONO_TYPE_R8) {
923                         fp_space += sizeof (double);
924                         *fp_arg_setup = start_arg;
925                 } else {
926                         break;
927                 }
928         }
929         return fp_space;
930 }
931
932 /* 
933  * take the arguments and generate the arch-specific
934  * instructions to properly call the function in call.
935  * This includes pushing, moving arguments to the right register
936  * etc.
937  */
938 MonoCallInst*
939 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
940         MonoInst *arg, *in;
941         MonoMethodSignature *sig;
942         int i, n;
943         CallInfo *cinfo;
944         int sentinelpos = 0;
945         int fp_args_space = 0, fp_args_offset = 0, fp_arg_setup = -1;
946
947         sig = call->signature;
948         n = sig->param_count + sig->hasthis;
949
950         cinfo = get_call_info (cfg->mempool, sig, FALSE);
951
952         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
953                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
954
955         for (i = 0; i < n; ++i) {
956                 ArgInfo *ainfo = cinfo->args + i;
957
958                 /* Emit the signature cookie just before the implicit arguments */
959                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
960                         emit_sig_cookie (cfg, call);
961                 }
962
963                 if (is_virtual && i == 0) {
964                         /* the argument will be attached to the call instrucion */
965                         in = call->args [i];
966                 } else {
967                         MonoType *t;
968
969                         if (i >= sig->hasthis)
970                                 t = sig->params [i - sig->hasthis];
971                         else
972                                 t = &mono_defaults.int_class->byval_arg;
973                         t = mono_type_get_underlying_type (t);
974
975                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
976                         in = call->args [i];
977                         arg->cil_code = in->cil_code;
978                         arg->inst_left = in;
979                         arg->type = in->type;
980                         /* prepend, so they get reversed */
981                         arg->next = call->out_args;
982                         call->out_args = arg;
983
984                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
985                                 guint32 size, align;
986
987                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
988                                         size = sizeof (MonoTypedRef);
989                                         align = sizeof (gpointer);
990                                 }
991                                 else
992                                         if (sig->pinvoke)
993                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
994                                         else {
995                                                 int ialign;
996                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
997                                                 align = ialign;
998                                         }
999                                 arg->opcode = OP_OUTARG_VT;
1000                                 arg->klass = in->klass;
1001                                 arg->backend.is_pinvoke = sig->pinvoke;
1002                                 arg->inst_imm = size; 
1003                         }
1004                         else {
1005                                 switch (ainfo->storage) {
1006                                 case ArgOnStack:
1007                                         arg->opcode = OP_OUTARG;
1008                                         if (!t->byref) {
1009                                                 if (t->type == MONO_TYPE_R4) {
1010                                                         arg->opcode = OP_OUTARG_R4;
1011                                                 } else if (t->type == MONO_TYPE_R8) {
1012                                                         arg->opcode = OP_OUTARG_R8;
1013                                                         /* we store in the upper bits of backen.arg_info the needed
1014                                                          * esp adjustment and in the lower bits the offset from esp
1015                                                          * where the arg needs to be stored
1016                                                          */
1017                                                         if (!fp_args_space) {
1018                                                                 fp_args_space = collect_fp_stack_space (sig, i - sig->hasthis, &fp_arg_setup);
1019                                                                 fp_args_offset = fp_args_space;
1020                                                         }
1021                                                         arg->backend.arg_info = fp_args_space - fp_args_offset;
1022                                                         fp_args_offset -= sizeof (double);
1023                                                         if (i - sig->hasthis == fp_arg_setup) {
1024                                                                 arg->backend.arg_info |= fp_args_space << 16;
1025                                                         }
1026                                                         if (fp_args_offset == 0) {
1027                                                                 /* the allocated esp stack is finished:
1028                                                                  * prepare for an eventual second run of fp args
1029                                                                  */
1030                                                                 fp_args_space = 0;
1031                                                         }
1032                                                 }
1033                                         }
1034                                         break;
1035                                 default:
1036                                         g_assert_not_reached ();
1037                                 }
1038                         }
1039                 }
1040         }
1041
1042         /* Handle the case where there are no implicit arguments */
1043         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1044                 emit_sig_cookie (cfg, call);
1045         }
1046
1047         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1048                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1049                         MonoInst *zero_inst;
1050                         /*
1051                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1052                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1053                          * before calling the function. So we add a dummy instruction to represent pushing the 
1054                          * struct return address to the stack. The return address will be saved to this stack slot 
1055                          * by the code emitted in this_vret_args.
1056                          */
1057                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1058                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1059                         zero_inst->inst_p0 = 0;
1060                         arg->inst_left = zero_inst;
1061                         arg->type = STACK_PTR;
1062                         /* prepend, so they get reversed */
1063                         arg->next = call->out_args;
1064                         call->out_args = arg;
1065                 }
1066                 else
1067                         /* if the function returns a struct, the called method already does a ret $0x4 */
1068                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1069                                 cinfo->stack_usage -= 4;
1070         }
1071         
1072         call->stack_usage = cinfo->stack_usage;
1073
1074 #if defined(__APPLE__)
1075         if (cinfo->need_stack_align) {
1076                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1077                 arg->inst_c0 = cinfo->stack_align_amount;
1078                 arg->next = call->out_args;
1079                 call->out_args = arg;
1080         }
1081 #endif 
1082
1083         return call;
1084 }
1085
1086 /*
1087  * Allow tracing to work with this interface (with an optional argument)
1088  */
1089 void*
1090 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1091 {
1092         guchar *code = p;
1093
1094 #if __APPLE__
1095         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1096 #endif
1097
1098         /* if some args are passed in registers, we need to save them here */
1099         x86_push_reg (code, X86_EBP);
1100
1101         if (cfg->compile_aot) {
1102                 x86_push_imm (code, cfg->method);
1103                 x86_mov_reg_imm (code, X86_EAX, func);
1104                 x86_call_reg (code, X86_EAX);
1105         } else {
1106                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1107                 x86_push_imm (code, cfg->method);
1108                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1109                 x86_call_code (code, 0);
1110         }
1111 #if __APPLE__
1112         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1113 #else
1114         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1115 #endif
1116
1117         return code;
1118 }
1119
1120 enum {
1121         SAVE_NONE,
1122         SAVE_STRUCT,
1123         SAVE_EAX,
1124         SAVE_EAX_EDX,
1125         SAVE_FP
1126 };
1127
1128 void*
1129 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1130 {
1131         guchar *code = p;
1132         int arg_size = 0, save_mode = SAVE_NONE;
1133         MonoMethod *method = cfg->method;
1134         
1135         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1136         case MONO_TYPE_VOID:
1137                 /* special case string .ctor icall */
1138                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1139                         save_mode = SAVE_EAX;
1140                 else
1141                         save_mode = SAVE_NONE;
1142                 break;
1143         case MONO_TYPE_I8:
1144         case MONO_TYPE_U8:
1145                 save_mode = SAVE_EAX_EDX;
1146                 break;
1147         case MONO_TYPE_R4:
1148         case MONO_TYPE_R8:
1149                 save_mode = SAVE_FP;
1150                 break;
1151         case MONO_TYPE_GENERICINST:
1152                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1153                         save_mode = SAVE_EAX;
1154                         break;
1155                 }
1156                 /* Fall through */
1157         case MONO_TYPE_VALUETYPE:
1158                 save_mode = SAVE_STRUCT;
1159                 break;
1160         default:
1161                 save_mode = SAVE_EAX;
1162                 break;
1163         }
1164
1165         switch (save_mode) {
1166         case SAVE_EAX_EDX:
1167                 x86_push_reg (code, X86_EDX);
1168                 x86_push_reg (code, X86_EAX);
1169                 if (enable_arguments) {
1170                         x86_push_reg (code, X86_EDX);
1171                         x86_push_reg (code, X86_EAX);
1172                         arg_size = 8;
1173                 }
1174                 break;
1175         case SAVE_EAX:
1176                 x86_push_reg (code, X86_EAX);
1177                 if (enable_arguments) {
1178                         x86_push_reg (code, X86_EAX);
1179                         arg_size = 4;
1180                 }
1181                 break;
1182         case SAVE_FP:
1183                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1184                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1185                 if (enable_arguments) {
1186                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1187                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1188                         arg_size = 8;
1189                 }
1190                 break;
1191         case SAVE_STRUCT:
1192                 if (enable_arguments) {
1193                         x86_push_membase (code, X86_EBP, 8);
1194                         arg_size = 4;
1195                 }
1196                 break;
1197         case SAVE_NONE:
1198         default:
1199                 break;
1200         }
1201
1202         if (cfg->compile_aot) {
1203                 x86_push_imm (code, method);
1204                 x86_mov_reg_imm (code, X86_EAX, func);
1205                 x86_call_reg (code, X86_EAX);
1206         } else {
1207                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1208                 x86_push_imm (code, method);
1209                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1210                 x86_call_code (code, 0);
1211         }
1212         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1213
1214         switch (save_mode) {
1215         case SAVE_EAX_EDX:
1216                 x86_pop_reg (code, X86_EAX);
1217                 x86_pop_reg (code, X86_EDX);
1218                 break;
1219         case SAVE_EAX:
1220                 x86_pop_reg (code, X86_EAX);
1221                 break;
1222         case SAVE_FP:
1223                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1224                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1225                 break;
1226         case SAVE_NONE:
1227         default:
1228                 break;
1229         }
1230
1231         return code;
1232 }
1233
1234 #define EMIT_COND_BRANCH(ins,cond,sign) \
1235 if (ins->flags & MONO_INST_BRLABEL) { \
1236         if (ins->inst_i0->inst_c0) { \
1237                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1238         } else { \
1239                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1240                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1241                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1242                         x86_branch8 (code, cond, 0, sign); \
1243                 else \
1244                         x86_branch32 (code, cond, 0, sign); \
1245         } \
1246 } else { \
1247         if (ins->inst_true_bb->native_offset) { \
1248                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1249         } else { \
1250                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1251                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1252                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1253                         x86_branch8 (code, cond, 0, sign); \
1254                 else \
1255                         x86_branch32 (code, cond, 0, sign); \
1256         } \
1257 }
1258
1259 /*  
1260  *      Emit an exception if condition is fail and
1261  *  if possible do a directly branch to target 
1262  */
1263 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1264         do {                                                        \
1265                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1266                 if (tins == NULL) {                                                                             \
1267                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1268                                         MONO_PATCH_INFO_EXC, exc_name);  \
1269                         x86_branch32 (code, cond, 0, signed);               \
1270                 } else {        \
1271                         EMIT_COND_BRANCH (tins, cond, signed);  \
1272                 }                       \
1273         } while (0); 
1274
1275 #define EMIT_FPCOMPARE(code) do { \
1276         x86_fcompp (code); \
1277         x86_fnstsw (code); \
1278 } while (0); 
1279
1280
1281 static guint8*
1282 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1283 {
1284         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1285         x86_call_code (code, 0);
1286
1287         return code;
1288 }
1289
1290 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1291
1292 /*
1293  * peephole_pass_1:
1294  *
1295  *   Perform peephole opts which should/can be performed before local regalloc
1296  */
1297 static void
1298 peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1299 {
1300         MonoInst *ins, *last_ins = NULL;
1301         ins = bb->code;
1302
1303         while (ins) {
1304                 switch (ins->opcode) {
1305                 case OP_IADD_IMM:
1306                 case OP_ADD_IMM:
1307                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1308                                 /* 
1309                                  * X86_LEA is like ADD, but doesn't have the
1310                                  * sreg1==dreg restriction.
1311                                  */
1312                                 ins->opcode = OP_X86_LEA_MEMBASE;
1313                                 ins->inst_basereg = ins->sreg1;
1314                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1315                                 ins->opcode = OP_X86_INC_REG;
1316                         break;
1317                 case OP_SUB_IMM:
1318                 case OP_ISUB_IMM:
1319                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1320                                 ins->opcode = OP_X86_LEA_MEMBASE;
1321                                 ins->inst_basereg = ins->sreg1;
1322                                 ins->inst_imm = -ins->inst_imm;
1323                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1324                                 ins->opcode = OP_X86_DEC_REG;
1325                         break;
1326                 case OP_COMPARE_IMM:
1327                 case OP_ICOMPARE_IMM:
1328                         /* OP_COMPARE_IMM (reg, 0) 
1329                          * --> 
1330                          * OP_X86_TEST_NULL (reg) 
1331                          */
1332                         if (!ins->inst_imm)
1333                                 ins->opcode = OP_X86_TEST_NULL;
1334                         break;
1335                 case OP_X86_COMPARE_MEMBASE_IMM:
1336                         /* 
1337                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1338                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1339                          * -->
1340                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1341                          * OP_COMPARE_IMM reg, imm
1342                          *
1343                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1344                          */
1345                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1346                             ins->inst_basereg == last_ins->inst_destbasereg &&
1347                             ins->inst_offset == last_ins->inst_offset) {
1348                                         ins->opcode = OP_COMPARE_IMM;
1349                                         ins->sreg1 = last_ins->sreg1;
1350
1351                                         /* check if we can remove cmp reg,0 with test null */
1352                                         if (!ins->inst_imm)
1353                                                 ins->opcode = OP_X86_TEST_NULL;
1354                                 }
1355
1356                         break;
1357                 case OP_LOAD_MEMBASE:
1358                 case OP_LOADI4_MEMBASE:
1359                         /* 
1360                          * Note: if reg1 = reg2 the load op is removed
1361                          *
1362                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1363                          * OP_LOAD_MEMBASE offset(basereg), reg2
1364                          * -->
1365                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1366                          * OP_MOVE reg1, reg2
1367                          */
1368                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1369                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1370                             ins->inst_basereg == last_ins->inst_destbasereg &&
1371                             ins->inst_offset == last_ins->inst_offset) {
1372                                 if (ins->dreg == last_ins->sreg1) {
1373                                         last_ins->next = ins->next;                             
1374                                         ins = ins->next;                                
1375                                         continue;
1376                                 } else {
1377                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1378                                         ins->opcode = OP_MOVE;
1379                                         ins->sreg1 = last_ins->sreg1;
1380                                 }
1381
1382                         /* 
1383                          * Note: reg1 must be different from the basereg in the second load
1384                          * Note: if reg1 = reg2 is equal then second load is removed
1385                          *
1386                          * OP_LOAD_MEMBASE offset(basereg), reg1
1387                          * OP_LOAD_MEMBASE offset(basereg), reg2
1388                          * -->
1389                          * OP_LOAD_MEMBASE offset(basereg), reg1
1390                          * OP_MOVE reg1, reg2
1391                          */
1392                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1393                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1394                               ins->inst_basereg != last_ins->dreg &&
1395                               ins->inst_basereg == last_ins->inst_basereg &&
1396                               ins->inst_offset == last_ins->inst_offset) {
1397
1398                                 if (ins->dreg == last_ins->dreg) {
1399                                         last_ins->next = ins->next;                             
1400                                         ins = ins->next;                                
1401                                         continue;
1402                                 } else {
1403                                         ins->opcode = OP_MOVE;
1404                                         ins->sreg1 = last_ins->dreg;
1405                                 }
1406
1407                                 //g_assert_not_reached ();
1408
1409 #if 0
1410                         /* 
1411                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1412                          * OP_LOAD_MEMBASE offset(basereg), reg
1413                          * -->
1414                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1415                          * OP_ICONST reg, imm
1416                          */
1417                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1418                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1419                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1420                                    ins->inst_offset == last_ins->inst_offset) {
1421                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1422                                 ins->opcode = OP_ICONST;
1423                                 ins->inst_c0 = last_ins->inst_imm;
1424                                 g_assert_not_reached (); // check this rule
1425 #endif
1426                         }
1427                         break;
1428                 case OP_LOADU1_MEMBASE:
1429                 case OP_LOADI1_MEMBASE:
1430                         /* 
1431                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1432                          * OP_LOAD_MEMBASE offset(basereg), reg2
1433                          * -->
1434                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1435                          * CONV_I2/U2 reg1, reg2
1436                          */
1437                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1438                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1439                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1440                                         ins->inst_offset == last_ins->inst_offset) {
1441                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1442                                 ins->sreg1 = last_ins->sreg1;
1443                         }
1444                         break;
1445                 case OP_LOADU2_MEMBASE:
1446                 case OP_LOADI2_MEMBASE:
1447                         /* 
1448                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1449                          * OP_LOAD_MEMBASE offset(basereg), reg2
1450                          * -->
1451                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1452                          * CONV_I2/U2 reg1, reg2
1453                          */
1454                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1455                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1456                                         ins->inst_offset == last_ins->inst_offset) {
1457                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1458                                 ins->sreg1 = last_ins->sreg1;
1459                         }
1460                         break;
1461                 case CEE_CONV_I4:
1462                 case CEE_CONV_U4:
1463                 case OP_ICONV_TO_I4:
1464                 case OP_MOVE:
1465                         /*
1466                          * Removes:
1467                          *
1468                          * OP_MOVE reg, reg 
1469                          */
1470                         if (ins->dreg == ins->sreg1) {
1471                                 if (last_ins)
1472                                         last_ins->next = ins->next;                             
1473                                 ins = ins->next;
1474                                 continue;
1475                         }
1476                         /* 
1477                          * Removes:
1478                          *
1479                          * OP_MOVE sreg, dreg 
1480                          * OP_MOVE dreg, sreg
1481                          */
1482                         if (last_ins && last_ins->opcode == OP_MOVE &&
1483                             ins->sreg1 == last_ins->dreg &&
1484                             ins->dreg == last_ins->sreg1) {
1485                                 last_ins->next = ins->next;                             
1486                                 ins = ins->next;                                
1487                                 continue;
1488                         }
1489                         break;
1490                         
1491                 case OP_X86_PUSH_MEMBASE:
1492                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1493                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1494                             ins->inst_basereg == last_ins->inst_destbasereg &&
1495                             ins->inst_offset == last_ins->inst_offset) {
1496                                     ins->opcode = OP_X86_PUSH;
1497                                     ins->sreg1 = last_ins->sreg1;
1498                         }
1499                         break;
1500                 }
1501                 last_ins = ins;
1502                 ins = ins->next;
1503         }
1504         bb->last_ins = last_ins;
1505 }
1506
1507 static void
1508 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1509 {
1510         MonoInst *ins, *last_ins = NULL;
1511         ins = bb->code;
1512
1513         while (ins) {
1514
1515                 switch (ins->opcode) {
1516                 case OP_ICONST:
1517                         /* reg = 0 -> XOR (reg, reg) */
1518                         /* XOR sets cflags on x86, so we cant do it always */
1519                         if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1520                                 MonoInst *ins2;
1521
1522                                 ins->opcode = OP_IXOR;
1523                                 ins->sreg1 = ins->dreg;
1524                                 ins->sreg2 = ins->dreg;
1525
1526                                 /* 
1527                                  * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
1528                                  * since it takes 3 bytes instead of 7.
1529                                  */
1530                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1531                                         if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1532                                                 ins2->opcode = OP_STORE_MEMBASE_REG;
1533                                                 ins2->sreg1 = ins->dreg;
1534                                         }
1535                                         else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1536                                                 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1537                                                 ins2->sreg1 = ins->dreg;
1538                                         }
1539                                         else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1540                                                 /* Continue iteration */
1541                                         }
1542                                         else
1543                                                 break;
1544                                 }
1545                         }
1546                         break;
1547                 case OP_IADD_IMM:
1548                 case OP_ADD_IMM:
1549                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1550                                 ins->opcode = OP_X86_INC_REG;
1551                         break;
1552                 case OP_ISUB_IMM:
1553                 case OP_SUB_IMM:
1554                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1555                                 ins->opcode = OP_X86_DEC_REG;
1556                         break;
1557                 case OP_X86_COMPARE_MEMBASE_IMM:
1558                         /* 
1559                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1560                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1561                          * -->
1562                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1563                          * OP_COMPARE_IMM reg, imm
1564                          *
1565                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1566                          */
1567                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1568                             ins->inst_basereg == last_ins->inst_destbasereg &&
1569                             ins->inst_offset == last_ins->inst_offset) {
1570                                         ins->opcode = OP_COMPARE_IMM;
1571                                         ins->sreg1 = last_ins->sreg1;
1572
1573                                         /* check if we can remove cmp reg,0 with test null */
1574                                         if (!ins->inst_imm)
1575                                                 ins->opcode = OP_X86_TEST_NULL;
1576                                 }
1577
1578                         break;
1579                 case OP_LOAD_MEMBASE:
1580                 case OP_LOADI4_MEMBASE:
1581                         /* 
1582                          * Note: if reg1 = reg2 the load op is removed
1583                          *
1584                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1585                          * OP_LOAD_MEMBASE offset(basereg), reg2
1586                          * -->
1587                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1588                          * OP_MOVE reg1, reg2
1589                          */
1590                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1591                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1592                             ins->inst_basereg == last_ins->inst_destbasereg &&
1593                             ins->inst_offset == last_ins->inst_offset) {
1594                                 if (ins->dreg == last_ins->sreg1) {
1595                                         last_ins->next = ins->next;                             
1596                                         ins = ins->next;                                
1597                                         continue;
1598                                 } else {
1599                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1600                                         ins->opcode = OP_MOVE;
1601                                         ins->sreg1 = last_ins->sreg1;
1602                                 }
1603
1604                         /* 
1605                          * Note: reg1 must be different from the basereg in the second load
1606                          * Note: if reg1 = reg2 is equal then second load is removed
1607                          *
1608                          * OP_LOAD_MEMBASE offset(basereg), reg1
1609                          * OP_LOAD_MEMBASE offset(basereg), reg2
1610                          * -->
1611                          * OP_LOAD_MEMBASE offset(basereg), reg1
1612                          * OP_MOVE reg1, reg2
1613                          */
1614                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1615                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1616                               ins->inst_basereg != last_ins->dreg &&
1617                               ins->inst_basereg == last_ins->inst_basereg &&
1618                               ins->inst_offset == last_ins->inst_offset) {
1619
1620                                 if (ins->dreg == last_ins->dreg) {
1621                                         last_ins->next = ins->next;                             
1622                                         ins = ins->next;                                
1623                                         continue;
1624                                 } else {
1625                                         ins->opcode = OP_MOVE;
1626                                         ins->sreg1 = last_ins->dreg;
1627                                 }
1628
1629                                 //g_assert_not_reached ();
1630
1631 #if 0
1632                         /* 
1633                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1634                          * OP_LOAD_MEMBASE offset(basereg), reg
1635                          * -->
1636                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1637                          * OP_ICONST reg, imm
1638                          */
1639                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1640                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1641                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1642                                    ins->inst_offset == last_ins->inst_offset) {
1643                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1644                                 ins->opcode = OP_ICONST;
1645                                 ins->inst_c0 = last_ins->inst_imm;
1646                                 g_assert_not_reached (); // check this rule
1647 #endif
1648                         }
1649                         break;
1650                 case OP_LOADU1_MEMBASE:
1651                 case OP_LOADI1_MEMBASE:
1652                         /* 
1653                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1654                          * OP_LOAD_MEMBASE offset(basereg), reg2
1655                          * -->
1656                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1657                          * CONV_I2/U2 reg1, reg2
1658                          */
1659                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1660                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1661                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1662                                         ins->inst_offset == last_ins->inst_offset) {
1663                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1664                                 ins->sreg1 = last_ins->sreg1;
1665                         }
1666                         break;
1667                 case OP_LOADU2_MEMBASE:
1668                 case OP_LOADI2_MEMBASE:
1669                         /* 
1670                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1671                          * OP_LOAD_MEMBASE offset(basereg), reg2
1672                          * -->
1673                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1674                          * CONV_I2/U2 reg1, reg2
1675                          */
1676                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1677                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1678                                         ins->inst_offset == last_ins->inst_offset) {
1679                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1680                                 ins->sreg1 = last_ins->sreg1;
1681                         }
1682                         break;
1683                 case CEE_CONV_I4:
1684                 case CEE_CONV_U4:
1685                 case OP_ICONV_TO_I4:
1686                 case OP_MOVE:
1687                         /*
1688                          * Removes:
1689                          *
1690                          * OP_MOVE reg, reg 
1691                          */
1692                         if (ins->dreg == ins->sreg1) {
1693                                 if (last_ins)
1694                                         last_ins->next = ins->next;                             
1695                                 ins = ins->next;
1696                                 continue;
1697                         }
1698                         /* 
1699                          * Removes:
1700                          *
1701                          * OP_MOVE sreg, dreg 
1702                          * OP_MOVE dreg, sreg
1703                          */
1704                         if (last_ins && last_ins->opcode == OP_MOVE &&
1705                             ins->sreg1 == last_ins->dreg &&
1706                             ins->dreg == last_ins->sreg1) {
1707                                 last_ins->next = ins->next;                             
1708                                 ins = ins->next;                                
1709                                 continue;
1710                         }
1711                         break;
1712                 case OP_X86_PUSH_MEMBASE:
1713                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1714                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1715                             ins->inst_basereg == last_ins->inst_destbasereg &&
1716                             ins->inst_offset == last_ins->inst_offset) {
1717                                     ins->opcode = OP_X86_PUSH;
1718                                     ins->sreg1 = last_ins->sreg1;
1719                         }
1720                         break;
1721                 }
1722                 last_ins = ins;
1723                 ins = ins->next;
1724         }
1725         bb->last_ins = last_ins;
1726 }
1727
1728 static const int 
1729 branch_cc_table [] = {
1730         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1731         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1732         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1733 };
1734
1735 /* Maps CMP_... constants to X86_CC_... constants */
1736 static const int
1737 cc_table [] = {
1738         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1739         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1740 };
1741
1742 static const int
1743 cc_signed_table [] = {
1744         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1745         FALSE, FALSE, FALSE, FALSE
1746 };
1747
1748 void
1749 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1750 {
1751         if (cfg->opt & MONO_OPT_PEEPHOLE)
1752                 peephole_pass_1 (cfg, bb);
1753
1754         mono_local_regalloc (cfg, bb);
1755 }
1756
1757 static unsigned char*
1758 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1759 {
1760 #define XMM_TEMP_REG 0
1761         if (cfg->opt & MONO_OPT_SSE2 && size < 8) {
1762                 /* optimize by assigning a local var for this use so we avoid
1763                  * the stack manipulations */
1764                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1765                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1766                 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1767                 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1768                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1769                 if (size == 1)
1770                         x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1771                 else if (size == 2)
1772                         x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1773                 return code;
1774         }
1775         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1776         x86_fnstcw_membase(code, X86_ESP, 0);
1777         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1778         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1779         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1780         x86_fldcw_membase (code, X86_ESP, 2);
1781         if (size == 8) {
1782                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1783                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1784                 x86_pop_reg (code, dreg);
1785                 /* FIXME: need the high register 
1786                  * x86_pop_reg (code, dreg_high);
1787                  */
1788         } else {
1789                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1790                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1791                 x86_pop_reg (code, dreg);
1792         }
1793         x86_fldcw_membase (code, X86_ESP, 0);
1794         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1795
1796         if (size == 1)
1797                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1798         else if (size == 2)
1799                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1800         return code;
1801 }
1802
1803 static unsigned char*
1804 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1805 {
1806         int sreg = tree->sreg1;
1807         int need_touch = FALSE;
1808
1809 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1810         need_touch = TRUE;
1811 #endif
1812
1813         if (need_touch) {
1814                 guint8* br[5];
1815
1816                 /*
1817                  * Under Windows:
1818                  * If requested stack size is larger than one page,
1819                  * perform stack-touch operation
1820                  */
1821                 /*
1822                  * Generate stack probe code.
1823                  * Under Windows, it is necessary to allocate one page at a time,
1824                  * "touching" stack after each successful sub-allocation. This is
1825                  * because of the way stack growth is implemented - there is a
1826                  * guard page before the lowest stack page that is currently commited.
1827                  * Stack normally grows sequentially so OS traps access to the
1828                  * guard page and commits more pages when needed.
1829                  */
1830                 x86_test_reg_imm (code, sreg, ~0xFFF);
1831                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1832
1833                 br[2] = code; /* loop */
1834                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1835                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1836
1837                 /* 
1838                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1839                  * that follows only initializes the last part of the area.
1840                  */
1841                 /* Same as the init code below with size==0x1000 */
1842                 if (tree->flags & MONO_INST_INIT) {
1843                         x86_push_reg (code, X86_EAX);
1844                         x86_push_reg (code, X86_ECX);
1845                         x86_push_reg (code, X86_EDI);
1846                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1847                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1848                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1849                         x86_cld (code);
1850                         x86_prefix (code, X86_REP_PREFIX);
1851                         x86_stosl (code);
1852                         x86_pop_reg (code, X86_EDI);
1853                         x86_pop_reg (code, X86_ECX);
1854                         x86_pop_reg (code, X86_EAX);
1855                 }
1856
1857                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1858                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1859                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1860                 x86_patch (br[3], br[2]);
1861                 x86_test_reg_reg (code, sreg, sreg);
1862                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1863                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1864
1865                 br[1] = code; x86_jump8 (code, 0);
1866
1867                 x86_patch (br[0], code);
1868                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1869                 x86_patch (br[1], code);
1870                 x86_patch (br[4], code);
1871         }
1872         else
1873                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1874
1875         if (tree->flags & MONO_INST_INIT) {
1876                 int offset = 0;
1877                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1878                         x86_push_reg (code, X86_EAX);
1879                         offset += 4;
1880                 }
1881                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1882                         x86_push_reg (code, X86_ECX);
1883                         offset += 4;
1884                 }
1885                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1886                         x86_push_reg (code, X86_EDI);
1887                         offset += 4;
1888                 }
1889                 
1890                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1891                 if (sreg != X86_ECX)
1892                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1893                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1894                                 
1895                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1896                 x86_cld (code);
1897                 x86_prefix (code, X86_REP_PREFIX);
1898                 x86_stosl (code);
1899                 
1900                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1901                         x86_pop_reg (code, X86_EDI);
1902                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1903                         x86_pop_reg (code, X86_ECX);
1904                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1905                         x86_pop_reg (code, X86_EAX);
1906         }
1907         return code;
1908 }
1909
1910
1911 static guint8*
1912 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1913 {
1914         CallInfo *cinfo;
1915         int quad;
1916
1917         /* Move return value to the target register */
1918         switch (ins->opcode) {
1919         case CEE_CALL:
1920         case OP_CALL_REG:
1921         case OP_CALL_MEMBASE:
1922                 if (ins->dreg != X86_EAX)
1923                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1924                 break;
1925         case OP_VCALL:
1926         case OP_VCALL_REG:
1927         case OP_VCALL_MEMBASE:
1928                 cinfo = get_call_info (cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1929                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1930                         /* Pop the destination address from the stack */
1931                         x86_pop_reg (code, X86_ECX);
1932                         
1933                         for (quad = 0; quad < 2; quad ++) {
1934                                 switch (cinfo->ret.pair_storage [quad]) {
1935                                 case ArgInIReg:
1936                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1937                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1938                                         break;
1939                                 case ArgNone:
1940                                         break;
1941                                 default:
1942                                         g_assert_not_reached ();
1943                                 }
1944                         }
1945                 }
1946         default:
1947                 break;
1948         }
1949
1950         return code;
1951 }
1952
1953 /*
1954  * emit_tls_get:
1955  * @code: buffer to store code to
1956  * @dreg: hard register where to place the result
1957  * @tls_offset: offset info
1958  *
1959  * emit_tls_get emits in @code the native code that puts in the dreg register
1960  * the item in the thread local storage identified by tls_offset.
1961  *
1962  * Returns: a pointer to the end of the stored code
1963  */
1964 static guint8*
1965 emit_tls_get (guint8* code, int dreg, int tls_offset)
1966 {
1967 #ifdef PLATFORM_WIN32
1968         /* 
1969          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1970          * Journal and/or a disassembly of the TlsGet () function.
1971          */
1972         g_assert (tls_offset < 64);
1973         x86_prefix (code, X86_FS_PREFIX);
1974         x86_mov_reg_mem (code, dreg, 0x18, 4);
1975         /* Dunno what this does but TlsGetValue () contains it */
1976         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1977         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1978 #else
1979         if (optimize_for_xen) {
1980                 x86_prefix (code, X86_GS_PREFIX);
1981                 x86_mov_reg_mem (code, dreg, 0, 4);
1982                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1983         } else {
1984                 x86_prefix (code, X86_GS_PREFIX);
1985                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1986         }
1987 #endif
1988         return code;
1989 }
1990
1991 /*
1992  * emit_load_volatile_arguments:
1993  *
1994  *  Load volatile arguments from the stack to the original input registers.
1995  * Required before a tail call.
1996  */
1997 static guint8*
1998 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1999 {
2000         MonoMethod *method = cfg->method;
2001         MonoMethodSignature *sig;
2002         MonoInst *inst;
2003         CallInfo *cinfo;
2004         guint32 i;
2005
2006         /* FIXME: Generate intermediate code instead */
2007
2008         sig = mono_method_signature (method);
2009
2010         cinfo = get_call_info (cfg->mempool, sig, FALSE);
2011         
2012         /* This is the opposite of the code in emit_prolog */
2013
2014         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2015                 ArgInfo *ainfo = cinfo->args + i;
2016                 MonoType *arg_type;
2017                 inst = cfg->args [i];
2018
2019                 if (sig->hasthis && (i == 0))
2020                         arg_type = &mono_defaults.object_class->byval_arg;
2021                 else
2022                         arg_type = sig->params [i - sig->hasthis];
2023
2024                 /*
2025                  * On x86, the arguments are either in their original stack locations, or in
2026                  * global regs.
2027                  */
2028                 if (inst->opcode == OP_REGVAR) {
2029                         g_assert (ainfo->storage == ArgOnStack);
2030                         
2031                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2032                 }
2033         }
2034
2035         return code;
2036 }
2037
2038 #define REAL_PRINT_REG(text,reg) \
2039 mono_assert (reg >= 0); \
2040 x86_push_reg (code, X86_EAX); \
2041 x86_push_reg (code, X86_EDX); \
2042 x86_push_reg (code, X86_ECX); \
2043 x86_push_reg (code, reg); \
2044 x86_push_imm (code, reg); \
2045 x86_push_imm (code, text " %d %p\n"); \
2046 x86_mov_reg_imm (code, X86_EAX, printf); \
2047 x86_call_reg (code, X86_EAX); \
2048 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2049 x86_pop_reg (code, X86_ECX); \
2050 x86_pop_reg (code, X86_EDX); \
2051 x86_pop_reg (code, X86_EAX);
2052
2053 /* benchmark and set based on cpu */
2054 #define LOOP_ALIGNMENT 8
2055 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2056
2057 void
2058 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2059 {
2060         MonoInst *ins;
2061         MonoCallInst *call;
2062         guint offset;
2063         guint8 *code = cfg->native_code + cfg->code_len;
2064         MonoInst *last_ins = NULL;
2065         guint last_offset = 0;
2066         int max_len, cpos;
2067
2068         if (cfg->opt & MONO_OPT_PEEPHOLE)
2069                 peephole_pass (cfg, bb);
2070
2071         if (cfg->opt & MONO_OPT_LOOP) {
2072                 int pad, align = LOOP_ALIGNMENT;
2073                 /* set alignment depending on cpu */
2074                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2075                         pad = align - pad;
2076                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2077                         x86_padding (code, pad);
2078                         cfg->code_len += pad;
2079                         bb->native_offset = cfg->code_len;
2080                 }
2081         }
2082
2083         if (cfg->verbose_level > 2)
2084                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2085
2086         cpos = bb->max_offset;
2087
2088         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2089                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2090                 g_assert (!cfg->compile_aot);
2091                 cpos += 6;
2092
2093                 cov->data [bb->dfn].cil_code = bb->cil_code;
2094                 /* this is not thread save, but good enough */
2095                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2096         }
2097
2098         offset = code - cfg->native_code;
2099
2100         mono_debug_open_block (cfg, bb, offset);
2101
2102         ins = bb->code;
2103         while (ins) {
2104                 offset = code - cfg->native_code;
2105
2106                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2107
2108                 if (offset > (cfg->code_size - max_len - 16)) {
2109                         cfg->code_size *= 2;
2110                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2111                         code = cfg->native_code + offset;
2112                         mono_jit_stats.code_reallocs++;
2113                 }
2114
2115                 mono_debug_record_line_number (cfg, ins, offset);
2116
2117                 switch (ins->opcode) {
2118                 case OP_BIGMUL:
2119                         x86_mul_reg (code, ins->sreg2, TRUE);
2120                         break;
2121                 case OP_BIGMUL_UN:
2122                         x86_mul_reg (code, ins->sreg2, FALSE);
2123                         break;
2124                 case OP_X86_SETEQ_MEMBASE:
2125                 case OP_X86_SETNE_MEMBASE:
2126                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2127                                          ins->inst_basereg, ins->inst_offset, TRUE);
2128                         break;
2129                 case OP_STOREI1_MEMBASE_IMM:
2130                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2131                         break;
2132                 case OP_STOREI2_MEMBASE_IMM:
2133                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2134                         break;
2135                 case OP_STORE_MEMBASE_IMM:
2136                 case OP_STOREI4_MEMBASE_IMM:
2137                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2138                         break;
2139                 case OP_STOREI1_MEMBASE_REG:
2140                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2141                         break;
2142                 case OP_STOREI2_MEMBASE_REG:
2143                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2144                         break;
2145                 case OP_STORE_MEMBASE_REG:
2146                 case OP_STOREI4_MEMBASE_REG:
2147                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2148                         break;
2149                 case CEE_LDIND_I:
2150                 case CEE_LDIND_I4:
2151                 case CEE_LDIND_U4:
2152                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2153                         break;
2154                 case OP_LOADU4_MEM:
2155                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2156                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2157                         break;
2158                 case OP_LOAD_MEMBASE:
2159                 case OP_LOADI4_MEMBASE:
2160                 case OP_LOADU4_MEMBASE:
2161                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2162                         break;
2163                 case OP_LOADU1_MEMBASE:
2164                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2165                         break;
2166                 case OP_LOADI1_MEMBASE:
2167                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2168                         break;
2169                 case OP_LOADU2_MEMBASE:
2170                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2171                         break;
2172                 case OP_LOADI2_MEMBASE:
2173                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2174                         break;
2175                 case CEE_CONV_I1:
2176                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2177                         break;
2178                 case CEE_CONV_I2:
2179                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2180                         break;
2181                 case CEE_CONV_U1:
2182                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2183                         break;
2184                 case CEE_CONV_U2:
2185                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2186                         break;
2187                 case OP_COMPARE:
2188                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2189                         break;
2190                 case OP_COMPARE_IMM:
2191                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2192                         break;
2193                 case OP_X86_COMPARE_MEMBASE_REG:
2194                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2195                         break;
2196                 case OP_X86_COMPARE_MEMBASE_IMM:
2197                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2198                         break;
2199                 case OP_X86_COMPARE_MEMBASE8_IMM:
2200                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2201                         break;
2202                 case OP_X86_COMPARE_REG_MEMBASE:
2203                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2204                         break;
2205                 case OP_X86_COMPARE_MEM_IMM:
2206                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2207                         break;
2208                 case OP_X86_TEST_NULL:
2209                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2210                         break;
2211                 case OP_X86_ADD_MEMBASE_IMM:
2212                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2213                         break;
2214                 case OP_X86_ADD_MEMBASE:
2215                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2216                         break;
2217                 case OP_X86_SUB_MEMBASE_IMM:
2218                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2219                         break;
2220                 case OP_X86_SUB_MEMBASE:
2221                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2222                         break;
2223                 case OP_X86_AND_MEMBASE_IMM:
2224                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2225                         break;
2226                 case OP_X86_OR_MEMBASE_IMM:
2227                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2228                         break;
2229                 case OP_X86_XOR_MEMBASE_IMM:
2230                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2231                         break;
2232                 case OP_X86_INC_MEMBASE:
2233                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2234                         break;
2235                 case OP_X86_INC_REG:
2236                         x86_inc_reg (code, ins->dreg);
2237                         break;
2238                 case OP_X86_DEC_MEMBASE:
2239                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2240                         break;
2241                 case OP_X86_DEC_REG:
2242                         x86_dec_reg (code, ins->dreg);
2243                         break;
2244                 case OP_X86_MUL_MEMBASE:
2245                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2246                         break;
2247                 case OP_BREAK:
2248                         x86_breakpoint (code);
2249                         break;
2250                 case OP_ADDCC:
2251                 case CEE_ADD:
2252                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2253                         break;
2254                 case OP_ADC:
2255                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2256                         break;
2257                 case OP_ADDCC_IMM:
2258                 case OP_ADD_IMM:
2259                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2260                         break;
2261                 case OP_ADC_IMM:
2262                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2263                         break;
2264                 case OP_SUBCC:
2265                 case CEE_SUB:
2266                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2267                         break;
2268                 case OP_SBB:
2269                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2270                         break;
2271                 case OP_SUBCC_IMM:
2272                 case OP_SUB_IMM:
2273                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2274                         break;
2275                 case OP_SBB_IMM:
2276                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2277                         break;
2278                 case CEE_AND:
2279                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2280                         break;
2281                 case OP_AND_IMM:
2282                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2283                         break;
2284                 case CEE_DIV:
2285                         x86_cdq (code);
2286                         x86_div_reg (code, ins->sreg2, TRUE);
2287                         break;
2288                 case CEE_DIV_UN:
2289                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2290                         x86_div_reg (code, ins->sreg2, FALSE);
2291                         break;
2292                 case OP_DIV_IMM:
2293                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2294                         x86_cdq (code);
2295                         x86_div_reg (code, ins->sreg2, TRUE);
2296                         break;
2297                 case CEE_REM:
2298                         x86_cdq (code);
2299                         x86_div_reg (code, ins->sreg2, TRUE);
2300                         break;
2301                 case CEE_REM_UN:
2302                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2303                         x86_div_reg (code, ins->sreg2, FALSE);
2304                         break;
2305                 case OP_REM_IMM:
2306                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2307                         x86_cdq (code);
2308                         x86_div_reg (code, ins->sreg2, TRUE);
2309                         break;
2310                 case CEE_OR:
2311                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2312                         break;
2313                 case OP_OR_IMM:
2314                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2315                         break;
2316                 case CEE_XOR:
2317                 case OP_IXOR:
2318                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2319                         break;
2320                 case OP_XOR_IMM:
2321                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2322                         break;
2323                 case CEE_SHL:
2324                         g_assert (ins->sreg2 == X86_ECX);
2325                         x86_shift_reg (code, X86_SHL, ins->dreg);
2326                         break;
2327                 case CEE_SHR:
2328                         g_assert (ins->sreg2 == X86_ECX);
2329                         x86_shift_reg (code, X86_SAR, ins->dreg);
2330                         break;
2331                 case OP_SHR_IMM:
2332                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2333                         break;
2334                 case OP_SHR_UN_IMM:
2335                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2336                         break;
2337                 case CEE_SHR_UN:
2338                         g_assert (ins->sreg2 == X86_ECX);
2339                         x86_shift_reg (code, X86_SHR, ins->dreg);
2340                         break;
2341                 case OP_SHL_IMM:
2342                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2343                         break;
2344                 case OP_LSHL: {
2345                         guint8 *jump_to_end;
2346
2347                         /* handle shifts below 32 bits */
2348                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2349                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2350
2351                         x86_test_reg_imm (code, X86_ECX, 32);
2352                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2353
2354                         /* handle shift over 32 bit */
2355                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2356                         x86_clear_reg (code, ins->sreg1);
2357                         
2358                         x86_patch (jump_to_end, code);
2359                         }
2360                         break;
2361                 case OP_LSHR: {
2362                         guint8 *jump_to_end;
2363
2364                         /* handle shifts below 32 bits */
2365                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2366                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2367
2368                         x86_test_reg_imm (code, X86_ECX, 32);
2369                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2370
2371                         /* handle shifts over 31 bits */
2372                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2373                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2374                         
2375                         x86_patch (jump_to_end, code);
2376                         }
2377                         break;
2378                 case OP_LSHR_UN: {
2379                         guint8 *jump_to_end;
2380
2381                         /* handle shifts below 32 bits */
2382                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2383                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2384
2385                         x86_test_reg_imm (code, X86_ECX, 32);
2386                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2387
2388                         /* handle shifts over 31 bits */
2389                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2390                         x86_clear_reg (code, ins->backend.reg3);
2391                         
2392                         x86_patch (jump_to_end, code);
2393                         }
2394                         break;
2395                 case OP_LSHL_IMM:
2396                         if (ins->inst_imm >= 32) {
2397                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2398                                 x86_clear_reg (code, ins->sreg1);
2399                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2400                         } else {
2401                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2402                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2403                         }
2404                         break;
2405                 case OP_LSHR_IMM:
2406                         if (ins->inst_imm >= 32) {
2407                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2408                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2409                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2410                         } else {
2411                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2412                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2413                         }
2414                         break;
2415                 case OP_LSHR_UN_IMM:
2416                         if (ins->inst_imm >= 32) {
2417                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2418                                 x86_clear_reg (code, ins->backend.reg3);
2419                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2420                         } else {
2421                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2422                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2423                         }
2424                         break;
2425                 case CEE_NOT:
2426                         x86_not_reg (code, ins->sreg1);
2427                         break;
2428                 case CEE_NEG:
2429                         x86_neg_reg (code, ins->sreg1);
2430                         break;
2431                 case OP_SEXT_I1:
2432                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2433                         break;
2434                 case OP_SEXT_I2:
2435                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2436                         break;
2437                 case CEE_MUL:
2438                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2439                         break;
2440                 case OP_MUL_IMM:
2441                         switch (ins->inst_imm) {
2442                         case 2:
2443                                 /* MOV r1, r2 */
2444                                 /* ADD r1, r1 */
2445                                 if (ins->dreg != ins->sreg1)
2446                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2447                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2448                                 break;
2449                         case 3:
2450                                 /* LEA r1, [r2 + r2*2] */
2451                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2452                                 break;
2453                         case 5:
2454                                 /* LEA r1, [r2 + r2*4] */
2455                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2456                                 break;
2457                         case 6:
2458                                 /* LEA r1, [r2 + r2*2] */
2459                                 /* ADD r1, r1          */
2460                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2461                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2462                                 break;
2463                         case 9:
2464                                 /* LEA r1, [r2 + r2*8] */
2465                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2466                                 break;
2467                         case 10:
2468                                 /* LEA r1, [r2 + r2*4] */
2469                                 /* ADD r1, r1          */
2470                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2471                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2472                                 break;
2473                         case 12:
2474                                 /* LEA r1, [r2 + r2*2] */
2475                                 /* SHL r1, 2           */
2476                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2477                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2478                                 break;
2479                         case 25:
2480                                 /* LEA r1, [r2 + r2*4] */
2481                                 /* LEA r1, [r1 + r1*4] */
2482                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2483                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2484                                 break;
2485                         case 100:
2486                                 /* LEA r1, [r2 + r2*4] */
2487                                 /* SHL r1, 2           */
2488                                 /* LEA r1, [r1 + r1*4] */
2489                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2490                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2491                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2492                                 break;
2493                         default:
2494                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2495                                 break;
2496                         }
2497                         break;
2498                 case CEE_MUL_OVF:
2499                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2500                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2501                         break;
2502                 case CEE_MUL_OVF_UN: {
2503                         /* the mul operation and the exception check should most likely be split */
2504                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2505                         /*g_assert (ins->sreg2 == X86_EAX);
2506                         g_assert (ins->dreg == X86_EAX);*/
2507                         if (ins->sreg2 == X86_EAX) {
2508                                 non_eax_reg = ins->sreg1;
2509                         } else if (ins->sreg1 == X86_EAX) {
2510                                 non_eax_reg = ins->sreg2;
2511                         } else {
2512                                 /* no need to save since we're going to store to it anyway */
2513                                 if (ins->dreg != X86_EAX) {
2514                                         saved_eax = TRUE;
2515                                         x86_push_reg (code, X86_EAX);
2516                                 }
2517                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2518                                 non_eax_reg = ins->sreg2;
2519                         }
2520                         if (ins->dreg == X86_EDX) {
2521                                 if (!saved_eax) {
2522                                         saved_eax = TRUE;
2523                                         x86_push_reg (code, X86_EAX);
2524                                 }
2525                         } else if (ins->dreg != X86_EAX) {
2526                                 saved_edx = TRUE;
2527                                 x86_push_reg (code, X86_EDX);
2528                         }
2529                         x86_mul_reg (code, non_eax_reg, FALSE);
2530                         /* save before the check since pop and mov don't change the flags */
2531                         if (ins->dreg != X86_EAX)
2532                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2533                         if (saved_edx)
2534                                 x86_pop_reg (code, X86_EDX);
2535                         if (saved_eax)
2536                                 x86_pop_reg (code, X86_EAX);
2537                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2538                         break;
2539                 }
2540                 case OP_ICONST:
2541                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2542                         break;
2543                 case OP_AOTCONST:
2544                         g_assert_not_reached ();
2545                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2546                         x86_mov_reg_imm (code, ins->dreg, 0);
2547                         break;
2548                 case OP_LOAD_GOTADDR:
2549                         x86_call_imm (code, 0);
2550                         /* 
2551                          * The patch needs to point to the pop, since the GOT offset needs 
2552                          * to be added to that address.
2553                          */
2554                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2555                         x86_pop_reg (code, ins->dreg);
2556                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2557                         break;
2558                 case OP_GOT_ENTRY:
2559                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2560                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2561                         break;
2562                 case OP_X86_PUSH_GOT_ENTRY:
2563                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2564                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2565                         break;
2566                 case CEE_CONV_I4:
2567                 case OP_MOVE:
2568                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2569                         break;
2570                 case CEE_CONV_U4:
2571                         g_assert_not_reached ();
2572                 case OP_JMP: {
2573                         /*
2574                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2575                          * Keep in sync with the code in emit_epilog.
2576                          */
2577                         int pos = 0;
2578
2579                         /* FIXME: no tracing support... */
2580                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2581                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2582                         /* reset offset to make max_len work */
2583                         offset = code - cfg->native_code;
2584
2585                         g_assert (!cfg->method->save_lmf);
2586
2587                         code = emit_load_volatile_arguments (cfg, code);
2588
2589                         if (cfg->used_int_regs & (1 << X86_EBX))
2590                                 pos -= 4;
2591                         if (cfg->used_int_regs & (1 << X86_EDI))
2592                                 pos -= 4;
2593                         if (cfg->used_int_regs & (1 << X86_ESI))
2594                                 pos -= 4;
2595                         if (pos)
2596                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2597         
2598                         if (cfg->used_int_regs & (1 << X86_ESI))
2599                                 x86_pop_reg (code, X86_ESI);
2600                         if (cfg->used_int_regs & (1 << X86_EDI))
2601                                 x86_pop_reg (code, X86_EDI);
2602                         if (cfg->used_int_regs & (1 << X86_EBX))
2603                                 x86_pop_reg (code, X86_EBX);
2604         
2605                         /* restore ESP/EBP */
2606                         x86_leave (code);
2607                         offset = code - cfg->native_code;
2608                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2609                         x86_jump32 (code, 0);
2610                         break;
2611                 }
2612                 case OP_CHECK_THIS:
2613                         /* ensure ins->sreg1 is not NULL
2614                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2615                          * cmp DWORD PTR [eax], 0
2616                          */
2617                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2618                         break;
2619                 case OP_ARGLIST: {
2620                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2621                         x86_push_reg (code, hreg);
2622                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2623                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2624                         x86_pop_reg (code, hreg);
2625                         break;
2626                 }
2627                 case OP_FCALL:
2628                 case OP_LCALL:
2629                 case OP_VCALL:
2630                 case OP_VOIDCALL:
2631                 case CEE_CALL:
2632                         call = (MonoCallInst*)ins;
2633                         if (ins->flags & MONO_INST_HAS_METHOD)
2634                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2635                         else
2636                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2637                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2638                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2639                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2640                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2641                                  * smart enough to do that optimization yet
2642                                  *
2643                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2644                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2645                                  * (most likely from locality benefits). People with other processors should
2646                                  * check on theirs to see what happens.
2647                                  */
2648                                 if (call->stack_usage == 4) {
2649                                         /* we want to use registers that won't get used soon, so use
2650                                          * ecx, as eax will get allocated first. edx is used by long calls,
2651                                          * so we can't use that.
2652                                          */
2653                                         
2654                                         x86_pop_reg (code, X86_ECX);
2655                                 } else {
2656                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2657                                 }
2658                         }
2659                         code = emit_move_return_value (cfg, ins, code);
2660                         break;
2661                 case OP_FCALL_REG:
2662                 case OP_LCALL_REG:
2663                 case OP_VCALL_REG:
2664                 case OP_VOIDCALL_REG:
2665                 case OP_CALL_REG:
2666                         call = (MonoCallInst*)ins;
2667                         x86_call_reg (code, ins->sreg1);
2668                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2669                                 if (call->stack_usage == 4)
2670                                         x86_pop_reg (code, X86_ECX);
2671                                 else
2672                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2673                         }
2674                         code = emit_move_return_value (cfg, ins, code);
2675                         break;
2676                 case OP_FCALL_MEMBASE:
2677                 case OP_LCALL_MEMBASE:
2678                 case OP_VCALL_MEMBASE:
2679                 case OP_VOIDCALL_MEMBASE:
2680                 case OP_CALL_MEMBASE:
2681                         call = (MonoCallInst*)ins;
2682                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2683                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2684                                 if (call->stack_usage == 4)
2685                                         x86_pop_reg (code, X86_ECX);
2686                                 else
2687                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2688                         }
2689                         code = emit_move_return_value (cfg, ins, code);
2690                         break;
2691                 case OP_OUTARG:
2692                 case OP_X86_PUSH:
2693                         x86_push_reg (code, ins->sreg1);
2694                         break;
2695                 case OP_X86_PUSH_IMM:
2696                         x86_push_imm (code, ins->inst_imm);
2697                         break;
2698                 case OP_X86_PUSH_MEMBASE:
2699                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2700                         break;
2701                 case OP_X86_PUSH_OBJ: 
2702                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2703                         x86_push_reg (code, X86_EDI);
2704                         x86_push_reg (code, X86_ESI);
2705                         x86_push_reg (code, X86_ECX);
2706                         if (ins->inst_offset)
2707                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2708                         else
2709                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2710                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2711                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2712                         x86_cld (code);
2713                         x86_prefix (code, X86_REP_PREFIX);
2714                         x86_movsd (code);
2715                         x86_pop_reg (code, X86_ECX);
2716                         x86_pop_reg (code, X86_ESI);
2717                         x86_pop_reg (code, X86_EDI);
2718                         break;
2719                 case OP_X86_LEA:
2720                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2721                         break;
2722                 case OP_X86_LEA_MEMBASE:
2723                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2724                         break;
2725                 case OP_X86_XCHG:
2726                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2727                         break;
2728                 case OP_LOCALLOC:
2729                         /* keep alignment */
2730                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2731                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2732                         code = mono_emit_stack_alloc (code, ins);
2733                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2734                         break;
2735                 case CEE_RET:
2736                         x86_ret (code);
2737                         break;
2738                 case OP_THROW: {
2739                         x86_push_reg (code, ins->sreg1);
2740                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2741                                                           (gpointer)"mono_arch_throw_exception");
2742                         break;
2743                 }
2744                 case OP_RETHROW: {
2745                         x86_push_reg (code, ins->sreg1);
2746                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2747                                                           (gpointer)"mono_arch_rethrow_exception");
2748                         break;
2749                 }
2750                 case OP_CALL_HANDLER: 
2751                         /* Align stack */
2752 #ifdef __APPLE__
2753                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2754 #endif
2755                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2756                         x86_call_imm (code, 0);
2757 #ifdef __APPLE__
2758                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2759 #endif
2760                         break;
2761                 case OP_LABEL:
2762                         ins->inst_c0 = code - cfg->native_code;
2763                         break;
2764                 case OP_BR:
2765                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2766                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2767                         //break;
2768                         if (ins->flags & MONO_INST_BRLABEL) {
2769                                 if (ins->inst_i0->inst_c0) {
2770                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2771                                 } else {
2772                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2773                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2774                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2775                                                 x86_jump8 (code, 0);
2776                                         else 
2777                                                 x86_jump32 (code, 0);
2778                                 }
2779                         } else {
2780                                 if (ins->inst_target_bb->native_offset) {
2781                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2782                                 } else {
2783                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2784                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2785                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2786                                                 x86_jump8 (code, 0);
2787                                         else 
2788                                                 x86_jump32 (code, 0);
2789                                 } 
2790                         }
2791                         break;
2792                 case OP_BR_REG:
2793                         x86_jump_reg (code, ins->sreg1);
2794                         break;
2795                 case OP_CEQ:
2796                 case OP_CLT:
2797                 case OP_CLT_UN:
2798                 case OP_CGT:
2799                 case OP_CGT_UN:
2800                 case OP_CNE:
2801                         x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2802                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2803                         break;
2804                 case OP_COND_EXC_EQ:
2805                 case OP_COND_EXC_NE_UN:
2806                 case OP_COND_EXC_LT:
2807                 case OP_COND_EXC_LT_UN:
2808                 case OP_COND_EXC_GT:
2809                 case OP_COND_EXC_GT_UN:
2810                 case OP_COND_EXC_GE:
2811                 case OP_COND_EXC_GE_UN:
2812                 case OP_COND_EXC_LE:
2813                 case OP_COND_EXC_LE_UN:
2814                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2815                         break;
2816                 case OP_COND_EXC_OV:
2817                 case OP_COND_EXC_NO:
2818                 case OP_COND_EXC_C:
2819                 case OP_COND_EXC_NC:
2820                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2821                         break;
2822                 case CEE_BEQ:
2823                 case CEE_BNE_UN:
2824                 case CEE_BLT:
2825                 case CEE_BLT_UN:
2826                 case CEE_BGT:
2827                 case CEE_BGT_UN:
2828                 case CEE_BGE:
2829                 case CEE_BGE_UN:
2830                 case CEE_BLE:
2831                 case CEE_BLE_UN:
2832                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2833                         break;
2834
2835                 /* floating point opcodes */
2836                 case OP_R8CONST: {
2837                         double d = *(double *)ins->inst_p0;
2838
2839                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2840                                 x86_fldz (code);
2841                         } else if (d == 1.0) {
2842                                 x86_fld1 (code);
2843                         } else {
2844                                 if (cfg->compile_aot) {
2845                                         guint32 *val = (guint32*)&d;
2846                                         x86_push_imm (code, val [1]);
2847                                         x86_push_imm (code, val [0]);
2848                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2849                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2850                                 }
2851                                 else {
2852                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2853                                         x86_fld (code, NULL, TRUE);
2854                                 }
2855                         }
2856                         break;
2857                 }
2858                 case OP_R4CONST: {
2859                         float f = *(float *)ins->inst_p0;
2860
2861                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2862                                 x86_fldz (code);
2863                         } else if (f == 1.0) {
2864                                 x86_fld1 (code);
2865                         } else {
2866                                 if (cfg->compile_aot) {
2867                                         guint32 val = *(guint32*)&f;
2868                                         x86_push_imm (code, val);
2869                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2870                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2871                                 }
2872                                 else {
2873                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2874                                         x86_fld (code, NULL, FALSE);
2875                                 }
2876                         }
2877                         break;
2878                 }
2879                 case OP_STORER8_MEMBASE_REG:
2880                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2881                         break;
2882                 case OP_LOADR8_SPILL_MEMBASE:
2883                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2884                         x86_fxch (code, 1);
2885                         break;
2886                 case OP_LOADR8_MEMBASE:
2887                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2888                         break;
2889                 case OP_STORER4_MEMBASE_REG:
2890                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2891                         break;
2892                 case OP_LOADR4_MEMBASE:
2893                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2894                         break;
2895                 case CEE_CONV_R4: /* FIXME: change precision */
2896                 case CEE_CONV_R8:
2897                         x86_push_reg (code, ins->sreg1);
2898                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2899                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2900                         break;
2901                 case OP_X86_FP_LOAD_I8:
2902                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2903                         break;
2904                 case OP_X86_FP_LOAD_I4:
2905                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2906                         break;
2907                 case OP_FCONV_TO_I1:
2908                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2909                         break;
2910                 case OP_FCONV_TO_U1:
2911                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2912                         break;
2913                 case OP_FCONV_TO_I2:
2914                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2915                         break;
2916                 case OP_FCONV_TO_U2:
2917                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2918                         break;
2919                 case OP_FCONV_TO_I4:
2920                 case OP_FCONV_TO_I:
2921                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2922                         break;
2923                 case OP_FCONV_TO_I8:
2924                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2925                         x86_fnstcw_membase(code, X86_ESP, 0);
2926                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2927                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2928                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2929                         x86_fldcw_membase (code, X86_ESP, 2);
2930                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2931                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2932                         x86_pop_reg (code, ins->dreg);
2933                         x86_pop_reg (code, ins->backend.reg3);
2934                         x86_fldcw_membase (code, X86_ESP, 0);
2935                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2936                         break;
2937                 case OP_LCONV_TO_R_UN: { 
2938                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2939                         guint8 *br;
2940
2941                         /* load 64bit integer to FP stack */
2942                         x86_push_imm (code, 0);
2943                         x86_push_reg (code, ins->sreg2);
2944                         x86_push_reg (code, ins->sreg1);
2945                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2946                         /* store as 80bit FP value */
2947                         x86_fst80_membase (code, X86_ESP, 0);
2948                         
2949                         /* test if lreg is negative */
2950                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2951                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2952         
2953                         /* add correction constant mn */
2954                         x86_fld80_mem (code, mn);
2955                         x86_fld80_membase (code, X86_ESP, 0);
2956                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2957                         x86_fst80_membase (code, X86_ESP, 0);
2958
2959                         x86_patch (br, code);
2960
2961                         x86_fld80_membase (code, X86_ESP, 0);
2962                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2963
2964                         break;
2965                 }
2966                 case OP_LCONV_TO_OVF_I: {
2967                         guint8 *br [3], *label [1];
2968                         MonoInst *tins;
2969
2970                         /* 
2971                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2972                          */
2973                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2974
2975                         /* If the low word top bit is set, see if we are negative */
2976                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2977                         /* We are not negative (no top bit set, check for our top word to be zero */
2978                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2979                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2980                         label [0] = code;
2981
2982                         /* throw exception */
2983                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2984                         if (tins) {
2985                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2986                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2987                                         x86_jump8 (code, 0);
2988                                 else
2989                                         x86_jump32 (code, 0);
2990                         } else {
2991                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2992                                 x86_jump32 (code, 0);
2993                         }
2994         
2995         
2996                         x86_patch (br [0], code);
2997                         /* our top bit is set, check that top word is 0xfffffff */
2998                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2999                 
3000                         x86_patch (br [1], code);
3001                         /* nope, emit exception */
3002                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3003                         x86_patch (br [2], label [0]);
3004
3005                         if (ins->dreg != ins->sreg1)
3006                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3007                         break;
3008                 }
3009                 case OP_FADD:
3010                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3011                         break;
3012                 case OP_FSUB:
3013                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3014                         break;          
3015                 case OP_FMUL:
3016                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3017                         break;          
3018                 case OP_FDIV:
3019                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3020                         break;          
3021                 case OP_FNEG:
3022                         x86_fchs (code);
3023                         break;          
3024                 case OP_SIN:
3025                         x86_fsin (code);
3026                         x86_fldz (code);
3027                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3028                         break;          
3029                 case OP_COS:
3030                         x86_fcos (code);
3031                         x86_fldz (code);
3032                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3033                         break;          
3034                 case OP_ABS:
3035                         x86_fabs (code);
3036                         break;          
3037                 case OP_TAN: {
3038                         /* 
3039                          * it really doesn't make sense to inline all this code,
3040                          * it's here just to show that things may not be as simple 
3041                          * as they appear.
3042                          */
3043                         guchar *check_pos, *end_tan, *pop_jump;
3044                         x86_push_reg (code, X86_EAX);
3045                         x86_fptan (code);
3046                         x86_fnstsw (code);
3047                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3048                         check_pos = code;
3049                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3050                         x86_fstp (code, 0); /* pop the 1.0 */
3051                         end_tan = code;
3052                         x86_jump8 (code, 0);
3053                         x86_fldpi (code);
3054                         x86_fp_op (code, X86_FADD, 0);
3055                         x86_fxch (code, 1);
3056                         x86_fprem1 (code);
3057                         x86_fstsw (code);
3058                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3059                         pop_jump = code;
3060                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3061                         x86_fstp (code, 1);
3062                         x86_fptan (code);
3063                         x86_patch (pop_jump, code);
3064                         x86_fstp (code, 0); /* pop the 1.0 */
3065                         x86_patch (check_pos, code);
3066                         x86_patch (end_tan, code);
3067                         x86_fldz (code);
3068                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3069                         x86_pop_reg (code, X86_EAX);
3070                         break;
3071                 }
3072                 case OP_ATAN:
3073                         x86_fld1 (code);
3074                         x86_fpatan (code);
3075                         x86_fldz (code);
3076                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3077                         break;          
3078                 case OP_SQRT:
3079                         x86_fsqrt (code);
3080                         break;          
3081                 case OP_X86_FPOP:
3082                         x86_fstp (code, 0);
3083                         break;          
3084                 case OP_FREM: {
3085                         guint8 *l1, *l2;
3086
3087                         x86_push_reg (code, X86_EAX);
3088                         /* we need to exchange ST(0) with ST(1) */
3089                         x86_fxch (code, 1);
3090
3091                         /* this requires a loop, because fprem somtimes 
3092                          * returns a partial remainder */
3093                         l1 = code;
3094                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3095                         /* x86_fprem1 (code); */
3096                         x86_fprem (code);
3097                         x86_fnstsw (code);
3098                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3099                         l2 = code + 2;
3100                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3101
3102                         /* pop result */
3103                         x86_fstp (code, 1);
3104
3105                         x86_pop_reg (code, X86_EAX);
3106                         break;
3107                 }
3108                 case OP_FCOMPARE:
3109                         if (cfg->opt & MONO_OPT_FCMOV) {
3110                                 x86_fcomip (code, 1);
3111                                 x86_fstp (code, 0);
3112                                 break;
3113                         }
3114                         /* this overwrites EAX */
3115                         EMIT_FPCOMPARE(code);
3116                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3117                         break;
3118                 case OP_FCEQ:
3119                         if (cfg->opt & MONO_OPT_FCMOV) {
3120                                 /* zeroing the register at the start results in 
3121                                  * shorter and faster code (we can also remove the widening op)
3122                                  */
3123                                 guchar *unordered_check;
3124                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3125                                 x86_fcomip (code, 1);
3126                                 x86_fstp (code, 0);
3127                                 unordered_check = code;
3128                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3129                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3130                                 x86_patch (unordered_check, code);
3131                                 break;
3132                         }
3133                         if (ins->dreg != X86_EAX) 
3134                                 x86_push_reg (code, X86_EAX);
3135
3136                         EMIT_FPCOMPARE(code);
3137                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3138                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3139                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3140                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3141
3142                         if (ins->dreg != X86_EAX) 
3143                                 x86_pop_reg (code, X86_EAX);
3144                         break;
3145                 case OP_FCLT:
3146                 case OP_FCLT_UN:
3147                         if (cfg->opt & MONO_OPT_FCMOV) {
3148                                 /* zeroing the register at the start results in 
3149                                  * shorter and faster code (we can also remove the widening op)
3150                                  */
3151                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3152                                 x86_fcomip (code, 1);
3153                                 x86_fstp (code, 0);
3154                                 if (ins->opcode == OP_FCLT_UN) {
3155                                         guchar *unordered_check = code;
3156                                         guchar *jump_to_end;
3157                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3158                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3159                                         jump_to_end = code;
3160                                         x86_jump8 (code, 0);
3161                                         x86_patch (unordered_check, code);
3162                                         x86_inc_reg (code, ins->dreg);
3163                                         x86_patch (jump_to_end, code);
3164                                 } else {
3165                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3166                                 }
3167                                 break;
3168                         }
3169                         if (ins->dreg != X86_EAX) 
3170                                 x86_push_reg (code, X86_EAX);
3171
3172                         EMIT_FPCOMPARE(code);
3173                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3174                         if (ins->opcode == OP_FCLT_UN) {
3175                                 guchar *is_not_zero_check, *end_jump;
3176                                 is_not_zero_check = code;
3177                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3178                                 end_jump = code;
3179                                 x86_jump8 (code, 0);
3180                                 x86_patch (is_not_zero_check, code);
3181                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3182
3183                                 x86_patch (end_jump, code);
3184                         }
3185                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3186                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3187
3188                         if (ins->dreg != X86_EAX) 
3189                                 x86_pop_reg (code, X86_EAX);
3190                         break;
3191                 case OP_FCGT:
3192                 case OP_FCGT_UN:
3193                         if (cfg->opt & MONO_OPT_FCMOV) {
3194                                 /* zeroing the register at the start results in 
3195                                  * shorter and faster code (we can also remove the widening op)
3196                                  */
3197                                 guchar *unordered_check;
3198                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3199                                 x86_fcomip (code, 1);
3200                                 x86_fstp (code, 0);
3201                                 if (ins->opcode == OP_FCGT) {
3202                                         unordered_check = code;
3203                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3204                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3205                                         x86_patch (unordered_check, code);
3206                                 } else {
3207                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3208                                 }
3209                                 break;
3210                         }
3211                         if (ins->dreg != X86_EAX) 
3212                                 x86_push_reg (code, X86_EAX);
3213
3214                         EMIT_FPCOMPARE(code);
3215                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3216                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3217                         if (ins->opcode == OP_FCGT_UN) {
3218                                 guchar *is_not_zero_check, *end_jump;
3219                                 is_not_zero_check = code;
3220                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3221                                 end_jump = code;
3222                                 x86_jump8 (code, 0);
3223                                 x86_patch (is_not_zero_check, code);
3224                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3225         
3226                                 x86_patch (end_jump, code);
3227                         }
3228                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3229                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3230
3231                         if (ins->dreg != X86_EAX) 
3232                                 x86_pop_reg (code, X86_EAX);
3233                         break;
3234                 case OP_FBEQ:
3235                         if (cfg->opt & MONO_OPT_FCMOV) {
3236                                 guchar *jump = code;
3237                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3238                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3239                                 x86_patch (jump, code);
3240                                 break;
3241                         }
3242                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3243                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3244                         break;
3245                 case OP_FBNE_UN:
3246                         /* Branch if C013 != 100 */
3247                         if (cfg->opt & MONO_OPT_FCMOV) {
3248                                 /* branch if !ZF or (PF|CF) */
3249                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3250                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3251                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3252                                 break;
3253                         }
3254                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3255                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3256                         break;
3257                 case OP_FBLT:
3258                         if (cfg->opt & MONO_OPT_FCMOV) {
3259                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3260                                 break;
3261                         }
3262                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3263                         break;
3264                 case OP_FBLT_UN:
3265                         if (cfg->opt & MONO_OPT_FCMOV) {
3266                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3267                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3268                                 break;
3269                         }
3270                         if (ins->opcode == OP_FBLT_UN) {
3271                                 guchar *is_not_zero_check, *end_jump;
3272                                 is_not_zero_check = code;
3273                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3274                                 end_jump = code;
3275                                 x86_jump8 (code, 0);
3276                                 x86_patch (is_not_zero_check, code);
3277                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3278
3279                                 x86_patch (end_jump, code);
3280                         }
3281                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3282                         break;
3283                 case OP_FBGT:
3284                 case OP_FBGT_UN:
3285                         if (cfg->opt & MONO_OPT_FCMOV) {
3286                                 if (ins->opcode == OP_FBGT) {
3287                                         guchar *br1;
3288
3289                                         /* skip branch if C1=1 */
3290                                         br1 = code;
3291                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3292                                         /* branch if (C0 | C3) = 1 */
3293                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3294                                         x86_patch (br1, code);
3295                                 } else {
3296                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3297                                 }
3298                                 break;
3299                         }
3300                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3301                         if (ins->opcode == OP_FBGT_UN) {
3302                                 guchar *is_not_zero_check, *end_jump;
3303                                 is_not_zero_check = code;
3304                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3305                                 end_jump = code;
3306                                 x86_jump8 (code, 0);
3307                                 x86_patch (is_not_zero_check, code);
3308                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3309
3310                                 x86_patch (end_jump, code);
3311                         }
3312                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3313                         break;
3314                 case OP_FBGE:
3315                         /* Branch if C013 == 100 or 001 */
3316                         if (cfg->opt & MONO_OPT_FCMOV) {
3317                                 guchar *br1;
3318
3319                                 /* skip branch if C1=1 */
3320                                 br1 = code;
3321                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3322                                 /* branch if (C0 | C3) = 1 */
3323                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3324                                 x86_patch (br1, code);
3325                                 break;
3326                         }
3327                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3328                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3329                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3330                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3331                         break;
3332                 case OP_FBGE_UN:
3333                         /* Branch if C013 == 000 */
3334                         if (cfg->opt & MONO_OPT_FCMOV) {
3335                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3336                                 break;
3337                         }
3338                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3339                         break;
3340                 case OP_FBLE:
3341                         /* Branch if C013=000 or 100 */
3342                         if (cfg->opt & MONO_OPT_FCMOV) {
3343                                 guchar *br1;
3344
3345                                 /* skip branch if C1=1 */
3346                                 br1 = code;
3347                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3348                                 /* branch if C0=0 */
3349                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3350                                 x86_patch (br1, code);
3351                                 break;
3352                         }
3353                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3354                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3355                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3356                         break;
3357                 case OP_FBLE_UN:
3358                         /* Branch if C013 != 001 */
3359                         if (cfg->opt & MONO_OPT_FCMOV) {
3360                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3361                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3362                                 break;
3363                         }
3364                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3365                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3366                         break;
3367                 case OP_CKFINITE: {
3368                         x86_push_reg (code, X86_EAX);
3369                         x86_fxam (code);
3370                         x86_fnstsw (code);
3371                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3372                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3373                         x86_pop_reg (code, X86_EAX);
3374                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3375                         break;
3376                 }
3377                 case OP_TLS_GET: {
3378                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3379                         break;
3380                 }
3381                 case OP_MEMORY_BARRIER: {
3382                         /* Not needed on x86 */
3383                         break;
3384                 }
3385                 case OP_ATOMIC_ADD_I4: {
3386                         int dreg = ins->dreg;
3387
3388                         if (dreg == ins->inst_basereg) {
3389                                 x86_push_reg (code, ins->sreg2);
3390                                 dreg = ins->sreg2;
3391                         } 
3392                         
3393                         if (dreg != ins->sreg2)
3394                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3395
3396                         x86_prefix (code, X86_LOCK_PREFIX);
3397                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3398
3399                         if (dreg != ins->dreg) {
3400                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3401                                 x86_pop_reg (code, dreg);
3402                         }
3403
3404                         break;
3405                 }
3406                 case OP_ATOMIC_ADD_NEW_I4: {
3407                         int dreg = ins->dreg;
3408
3409                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3410                         if (ins->sreg2 == dreg) {
3411                                 if (dreg == X86_EBX) {
3412                                         dreg = X86_EDI;
3413                                         if (ins->inst_basereg == X86_EDI)
3414                                                 dreg = X86_ESI;
3415                                 } else {
3416                                         dreg = X86_EBX;
3417                                         if (ins->inst_basereg == X86_EBX)
3418                                                 dreg = X86_EDI;
3419                                 }
3420                         } else if (ins->inst_basereg == dreg) {
3421                                 if (dreg == X86_EBX) {
3422                                         dreg = X86_EDI;
3423                                         if (ins->sreg2 == X86_EDI)
3424                                                 dreg = X86_ESI;
3425                                 } else {
3426                                         dreg = X86_EBX;
3427                                         if (ins->sreg2 == X86_EBX)
3428                                                 dreg = X86_EDI;
3429                                 }
3430                         }
3431
3432                         if (dreg != ins->dreg) {
3433                                 x86_push_reg (code, dreg);
3434                         }
3435
3436                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3437                         x86_prefix (code, X86_LOCK_PREFIX);
3438                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3439                         /* dreg contains the old value, add with sreg2 value */
3440                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3441                         
3442                         if (ins->dreg != dreg) {
3443                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3444                                 x86_pop_reg (code, dreg);
3445                         }
3446
3447                         break;
3448                 }
3449                 case OP_ATOMIC_EXCHANGE_I4: {
3450                         guchar *br[2];
3451                         int sreg2 = ins->sreg2;
3452                         int breg = ins->inst_basereg;
3453
3454                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3455                          * hack to overcome limits in x86 reg allocator 
3456                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3457                          */
3458                         if (ins->dreg != X86_EAX)
3459                                 x86_push_reg (code, X86_EAX);
3460                         
3461                         /* We need the EAX reg for the cmpxchg */
3462                         if (ins->sreg2 == X86_EAX) {
3463                                 x86_push_reg (code, X86_EDX);
3464                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3465                                 sreg2 = X86_EDX;
3466                         }
3467
3468                         if (breg == X86_EAX) {
3469                                 x86_push_reg (code, X86_ESI);
3470                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3471                                 breg = X86_ESI;
3472                         }
3473
3474                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3475
3476                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3477                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3478                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3479                         x86_patch (br [1], br [0]);
3480
3481                         if (breg != ins->inst_basereg)
3482                                 x86_pop_reg (code, X86_ESI);
3483
3484                         if (ins->dreg != X86_EAX) {
3485                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3486                                 x86_pop_reg (code, X86_EAX);
3487                         }
3488
3489                         if (ins->sreg2 != sreg2)
3490                                 x86_pop_reg (code, X86_EDX);
3491
3492                         break;
3493                 }
3494                 default:
3495                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3496                         g_assert_not_reached ();
3497                 }
3498
3499                 if ((code - cfg->native_code - offset) > max_len) {
3500                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3501                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3502                         g_assert_not_reached ();
3503                 }
3504                
3505                 cpos += max_len;
3506
3507                 last_ins = ins;
3508                 last_offset = offset;
3509                 
3510                 ins = ins->next;
3511         }
3512
3513         cfg->code_len = code - cfg->native_code;
3514 }
3515
3516 void
3517 mono_arch_register_lowlevel_calls (void)
3518 {
3519 }
3520
3521 void
3522 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3523 {
3524         MonoJumpInfo *patch_info;
3525         gboolean compile_aot = !run_cctors;
3526
3527         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3528                 unsigned char *ip = patch_info->ip.i + code;
3529                 const unsigned char *target;
3530
3531                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3532
3533                 if (compile_aot) {
3534                         switch (patch_info->type) {
3535                         case MONO_PATCH_INFO_BB:
3536                         case MONO_PATCH_INFO_LABEL:
3537                                 break;
3538                         default:
3539                                 /* No need to patch these */
3540                                 continue;
3541                         }
3542                 }
3543
3544                 switch (patch_info->type) {
3545                 case MONO_PATCH_INFO_IP:
3546                         *((gconstpointer *)(ip)) = target;
3547                         break;
3548                 case MONO_PATCH_INFO_CLASS_INIT: {
3549                         guint8 *code = ip;
3550                         /* Might already been changed to a nop */
3551                         x86_call_code (code, 0);
3552                         x86_patch (ip, target);
3553                         break;
3554                 }
3555                 case MONO_PATCH_INFO_ABS:
3556                 case MONO_PATCH_INFO_METHOD:
3557                 case MONO_PATCH_INFO_METHOD_JUMP:
3558                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3559                 case MONO_PATCH_INFO_BB:
3560                 case MONO_PATCH_INFO_LABEL:
3561                         x86_patch (ip, target);
3562                         break;
3563                 case MONO_PATCH_INFO_NONE:
3564                         break;
3565                 default: {
3566                         guint32 offset = mono_arch_get_patch_offset (ip);
3567                         *((gconstpointer *)(ip + offset)) = target;
3568                         break;
3569                 }
3570                 }
3571         }
3572 }
3573
3574 guint8 *
3575 mono_arch_emit_prolog (MonoCompile *cfg)
3576 {
3577         MonoMethod *method = cfg->method;
3578         MonoBasicBlock *bb;
3579         MonoMethodSignature *sig;
3580         MonoInst *inst;
3581         int alloc_size, pos, max_offset, i;
3582         guint8 *code;
3583
3584         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3585
3586         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3587                 cfg->code_size += 512;
3588
3589         code = cfg->native_code = g_malloc (cfg->code_size);
3590
3591         x86_push_reg (code, X86_EBP);
3592         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3593
3594         alloc_size = cfg->stack_offset;
3595         pos = 0;
3596
3597         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3598                 /* Might need to attach the thread to the JIT */
3599                 if (lmf_tls_offset != -1) {
3600                         guint8 *buf;
3601
3602                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3603                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3604                         buf = code;
3605                         x86_branch8 (code, X86_CC_NE, 0, 0);
3606                         x86_push_imm (code, cfg->domain);
3607                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3608                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3609                         x86_patch (buf, code);
3610 #ifdef PLATFORM_WIN32
3611                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3612                         /* FIXME: Add a separate key for LMF to avoid this */
3613                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3614 #endif
3615                 } else {
3616                         g_assert (!cfg->compile_aot);
3617                         x86_push_imm (code, cfg->domain);
3618                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3619                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3620                 }
3621         }
3622
3623         if (method->save_lmf) {
3624                 pos += sizeof (MonoLMF);
3625
3626                 /* save the current IP */
3627                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3628                 x86_push_imm_template (code);
3629
3630                 /* save all caller saved regs */
3631                 x86_push_reg (code, X86_EBP);
3632                 x86_push_reg (code, X86_ESI);
3633                 x86_push_reg (code, X86_EDI);
3634                 x86_push_reg (code, X86_EBX);
3635
3636                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3637                         /*
3638                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3639                          * through the mono_lmf_addr TLS variable.
3640                          */
3641                         /* %eax = previous_lmf */
3642                         x86_prefix (code, X86_GS_PREFIX);
3643                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3644                         /* skip esp + method_info + lmf */
3645                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
3646                         /* push previous_lmf */
3647                         x86_push_reg (code, X86_EAX);
3648                         /* new lmf = ESP */
3649                         x86_prefix (code, X86_GS_PREFIX);
3650                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3651                 } else {
3652                         /* get the address of lmf for the current thread */
3653                         /* 
3654                          * This is performance critical so we try to use some tricks to make
3655                          * it fast.
3656                          */                                                                        
3657
3658                         if (lmf_addr_tls_offset != -1) {
3659                                 /* Load lmf quicky using the GS register */
3660                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3661 #ifdef PLATFORM_WIN32
3662                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3663                                 /* FIXME: Add a separate key for LMF to avoid this */
3664                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3665 #endif
3666                         } else {
3667                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3668                         }
3669
3670                         /* Skip esp + method info */
3671                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3672
3673                         /* push lmf */
3674                         x86_push_reg (code, X86_EAX); 
3675                         /* push *lfm (previous_lmf) */
3676                         x86_push_membase (code, X86_EAX, 0);
3677                         /* *(lmf) = ESP */
3678                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3679                 }
3680         } else {
3681
3682                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3683                         x86_push_reg (code, X86_EBX);
3684                         pos += 4;
3685                 }
3686
3687                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3688                         x86_push_reg (code, X86_EDI);
3689                         pos += 4;
3690                 }
3691
3692                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3693                         x86_push_reg (code, X86_ESI);
3694                         pos += 4;
3695                 }
3696         }
3697
3698         alloc_size -= pos;
3699
3700 #if __APPLE__
3701         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3702         {
3703                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3704                 if (tot & 4) {
3705                         tot += 4;
3706                         alloc_size += 4;
3707                 }
3708                 if (tot & 8) {
3709                         alloc_size += 8;
3710                 }
3711         }
3712 #endif
3713
3714         if (alloc_size) {
3715                 /* See mono_emit_stack_alloc */
3716 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3717                 guint32 remaining_size = alloc_size;
3718                 while (remaining_size >= 0x1000) {
3719                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3720                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3721                         remaining_size -= 0x1000;
3722                 }
3723                 if (remaining_size)
3724                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3725 #else
3726                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3727 #endif
3728         }
3729
3730 #if __APPLE_
3731         /* check the stack is aligned */
3732         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3733         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3734         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3735         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3736         x86_breakpoint (code);
3737 #endif
3738
3739         /* compute max_offset in order to use short forward jumps */
3740         max_offset = 0;
3741         if (cfg->opt & MONO_OPT_BRANCH) {
3742                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3743                         MonoInst *ins = bb->code;
3744                         bb->max_offset = max_offset;
3745
3746                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3747                                 max_offset += 6;
3748                         /* max alignment for loops */
3749                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3750                                 max_offset += LOOP_ALIGNMENT;
3751
3752                         while (ins) {
3753                                 if (ins->opcode == OP_LABEL)
3754                                         ins->inst_c1 = max_offset;
3755                                 
3756                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3757                                 ins = ins->next;
3758                         }
3759                 }
3760         }
3761
3762         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3763                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3764
3765         /* load arguments allocated to register from the stack */
3766         sig = mono_method_signature (method);
3767         pos = 0;
3768
3769         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3770                 inst = cfg->args [pos];
3771                 if (inst->opcode == OP_REGVAR) {
3772                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3773                         if (cfg->verbose_level > 2)
3774                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3775                 }
3776                 pos++;
3777         }
3778
3779         cfg->code_len = code - cfg->native_code;
3780
3781         return code;
3782 }
3783
3784 void
3785 mono_arch_emit_epilog (MonoCompile *cfg)
3786 {
3787         MonoMethod *method = cfg->method;
3788         MonoMethodSignature *sig = mono_method_signature (method);
3789         int quad, pos;
3790         guint32 stack_to_pop;
3791         guint8 *code;
3792         int max_epilog_size = 16;
3793         CallInfo *cinfo;
3794         
3795         if (cfg->method->save_lmf)
3796                 max_epilog_size += 128;
3797
3798         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3799                 cfg->code_size *= 2;
3800                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3801                 mono_jit_stats.code_reallocs++;
3802         }
3803
3804         code = cfg->native_code + cfg->code_len;
3805
3806         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3807                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3808
3809         /* the code restoring the registers must be kept in sync with OP_JMP */
3810         pos = 0;
3811         
3812         if (method->save_lmf) {
3813                 gint32 prev_lmf_reg;
3814                 gint32 lmf_offset = -sizeof (MonoLMF);
3815
3816                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3817                         /*
3818                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3819                          * through the mono_lmf_addr TLS variable.
3820                          */
3821                         /* reg = previous_lmf */
3822                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3823
3824                         /* lmf = previous_lmf */
3825                         x86_prefix (code, X86_GS_PREFIX);
3826                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3827                 } else {
3828                         /* Find a spare register */
3829                         switch (sig->ret->type) {
3830                         case MONO_TYPE_I8:
3831                         case MONO_TYPE_U8:
3832                                 prev_lmf_reg = X86_EDI;
3833                                 cfg->used_int_regs |= (1 << X86_EDI);
3834                                 break;
3835                         default:
3836                                 prev_lmf_reg = X86_EDX;
3837                                 break;
3838                         }
3839
3840                         /* reg = previous_lmf */
3841                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3842
3843                         /* ecx = lmf */
3844                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3845
3846                         /* *(lmf) = previous_lmf */
3847                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3848                 }
3849
3850                 /* restore caller saved regs */
3851                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3852                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3853                 }
3854
3855                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3856                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3857                 }
3858                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3859                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3860                 }
3861
3862                 /* EBP is restored by LEAVE */
3863         } else {
3864                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3865                         pos -= 4;
3866                 }
3867                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3868                         pos -= 4;
3869                 }
3870                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3871                         pos -= 4;
3872                 }
3873
3874                 if (pos)
3875                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3876
3877                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3878                         x86_pop_reg (code, X86_ESI);
3879                 }
3880                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3881                         x86_pop_reg (code, X86_EDI);
3882                 }
3883                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3884                         x86_pop_reg (code, X86_EBX);
3885                 }
3886         }
3887
3888         /* Load returned vtypes into registers if needed */
3889         cinfo = get_call_info (cfg->mempool, sig, FALSE);
3890         if (cinfo->ret.storage == ArgValuetypeInReg) {
3891                 for (quad = 0; quad < 2; quad ++) {
3892                         switch (cinfo->ret.pair_storage [quad]) {
3893                         case ArgInIReg:
3894                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3895                                 break;
3896                         case ArgOnFloatFpStack:
3897                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3898                                 break;
3899                         case ArgOnDoubleFpStack:
3900                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3901                                 break;
3902                         case ArgNone:
3903                                 break;
3904                         default:
3905                                 g_assert_not_reached ();
3906                         }
3907                 }
3908         }
3909
3910         x86_leave (code);
3911
3912         if (CALLCONV_IS_STDCALL (sig)) {
3913                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3914
3915                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3916         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3917                 stack_to_pop = 4;
3918         else
3919                 stack_to_pop = 0;
3920
3921         if (stack_to_pop)
3922                 x86_ret_imm (code, stack_to_pop);
3923         else
3924                 x86_ret (code);
3925
3926         cfg->code_len = code - cfg->native_code;
3927
3928         g_assert (cfg->code_len < cfg->code_size);
3929 }
3930
3931 void
3932 mono_arch_emit_exceptions (MonoCompile *cfg)
3933 {
3934         MonoJumpInfo *patch_info;
3935         int nthrows, i;
3936         guint8 *code;
3937         MonoClass *exc_classes [16];
3938         guint8 *exc_throw_start [16], *exc_throw_end [16];
3939         guint32 code_size;
3940         int exc_count = 0;
3941
3942         /* Compute needed space */
3943         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3944                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3945                         exc_count++;
3946         }
3947
3948         /* 
3949          * make sure we have enough space for exceptions
3950          * 16 is the size of two push_imm instructions and a call
3951          */
3952         if (cfg->compile_aot)
3953                 code_size = exc_count * 32;
3954         else
3955                 code_size = exc_count * 16;
3956
3957         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3958                 cfg->code_size *= 2;
3959                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3960                 mono_jit_stats.code_reallocs++;
3961         }
3962
3963         code = cfg->native_code + cfg->code_len;
3964
3965         nthrows = 0;
3966         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3967                 switch (patch_info->type) {
3968                 case MONO_PATCH_INFO_EXC: {
3969                         MonoClass *exc_class;
3970                         guint8 *buf, *buf2;
3971                         guint32 throw_ip;
3972
3973                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3974
3975                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3976                         g_assert (exc_class);
3977                         throw_ip = patch_info->ip.i;
3978
3979                         /* Find a throw sequence for the same exception class */
3980                         for (i = 0; i < nthrows; ++i)
3981                                 if (exc_classes [i] == exc_class)
3982                                         break;
3983                         if (i < nthrows) {
3984                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3985                                 x86_jump_code (code, exc_throw_start [i]);
3986                                 patch_info->type = MONO_PATCH_INFO_NONE;
3987                         }
3988                         else {
3989                                 guint32 size;
3990
3991                                 /* Compute size of code following the push <OFFSET> */
3992                                 size = 5 + 5;
3993
3994                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3995                                         /* Use the shorter form */
3996                                         buf = buf2 = code;
3997                                         x86_push_imm (code, 0);
3998                                 }
3999                                 else {
4000                                         buf = code;
4001                                         x86_push_imm (code, 0xf0f0f0f0);
4002                                         buf2 = code;
4003                                 }
4004
4005                                 if (nthrows < 16) {
4006                                         exc_classes [nthrows] = exc_class;
4007                                         exc_throw_start [nthrows] = code;
4008                                 }
4009
4010                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
4011                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4012                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4013                                 patch_info->ip.i = code - cfg->native_code;
4014                                 x86_call_code (code, 0);
4015                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4016                                 while (buf < buf2)
4017                                         x86_nop (buf);
4018
4019                                 if (nthrows < 16) {
4020                                         exc_throw_end [nthrows] = code;
4021                                         nthrows ++;
4022                                 }
4023                         }
4024                         break;
4025                 }
4026                 default:
4027                         /* do nothing */
4028                         break;
4029                 }
4030         }
4031
4032         cfg->code_len = code - cfg->native_code;
4033
4034         g_assert (cfg->code_len < cfg->code_size);
4035 }
4036
4037 void
4038 mono_arch_flush_icache (guint8 *code, gint size)
4039 {
4040         /* not needed */
4041 }
4042
4043 void
4044 mono_arch_flush_register_windows (void)
4045 {
4046 }
4047
4048 /*
4049  * Support for fast access to the thread-local lmf structure using the GS
4050  * segment register on NPTL + kernel 2.6.x.
4051  */
4052
4053 static gboolean tls_offset_inited = FALSE;
4054
4055 void
4056 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4057 {
4058         if (!tls_offset_inited) {
4059                 if (!getenv ("MONO_NO_TLS")) {
4060 #ifdef PLATFORM_WIN32
4061                         /* 
4062                          * We need to init this multiple times, since when we are first called, the key might not
4063                          * be initialized yet.
4064                          */
4065                         appdomain_tls_offset = mono_domain_get_tls_key ();
4066                         lmf_tls_offset = mono_get_jit_tls_key ();
4067                         thread_tls_offset = mono_thread_get_tls_key ();
4068
4069                         /* Only 64 tls entries can be accessed using inline code */
4070                         if (appdomain_tls_offset >= 64)
4071                                 appdomain_tls_offset = -1;
4072                         if (lmf_tls_offset >= 64)
4073                                 lmf_tls_offset = -1;
4074                         if (thread_tls_offset >= 64)
4075                                 thread_tls_offset = -1;
4076 #else
4077 #if MONO_XEN_OPT
4078                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4079 #endif
4080                         tls_offset_inited = TRUE;
4081                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4082                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4083                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4084                         thread_tls_offset = mono_thread_get_tls_offset ();
4085 #endif
4086                 }
4087         }               
4088 }
4089
4090 void
4091 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4092 {
4093 }
4094
4095 void
4096 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4097 {
4098         MonoCallInst *call = (MonoCallInst*)inst;
4099         CallInfo *cinfo = get_call_info (cfg->mempool, inst->signature, FALSE);
4100
4101         /* add the this argument */
4102         if (this_reg != -1) {
4103                 if (cinfo->args [0].storage == ArgInIReg) {
4104                         MonoInst *this;
4105                         MONO_INST_NEW (cfg, this, OP_MOVE);
4106                         this->type = this_type;
4107                         this->sreg1 = this_reg;
4108                         this->dreg = mono_regstate_next_int (cfg->rs);
4109                         mono_bblock_add_inst (cfg->cbb, this);
4110
4111                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
4112                 }
4113                 else {
4114                         MonoInst *this;
4115                         MONO_INST_NEW (cfg, this, OP_OUTARG);
4116                         this->type = this_type;
4117                         this->sreg1 = this_reg;
4118                         mono_bblock_add_inst (cfg->cbb, this);
4119                 }
4120         }
4121
4122         if (vt_reg != -1) {
4123                 MonoInst *vtarg;
4124
4125                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4126                         /*
4127                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4128                          * the stack. Save the address here, so the call instruction can
4129                          * access it.
4130                          */
4131                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4132                         vtarg->inst_destbasereg = X86_ESP;
4133                         vtarg->inst_offset = inst->stack_usage;
4134                         vtarg->sreg1 = vt_reg;
4135                         mono_bblock_add_inst (cfg->cbb, vtarg);
4136                 }
4137                 else if (cinfo->ret.storage == ArgInIReg) {
4138                         /* The return address is passed in a register */
4139                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4140                         vtarg->sreg1 = vt_reg;
4141                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4142                         mono_bblock_add_inst (cfg->cbb, vtarg);
4143
4144                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
4145                 } else {
4146                         MonoInst *vtarg;
4147                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4148                         vtarg->type = STACK_MP;
4149                         vtarg->sreg1 = vt_reg;
4150                         mono_bblock_add_inst (cfg->cbb, vtarg);
4151                 }
4152         }
4153 }
4154
4155 #ifdef MONO_ARCH_HAVE_IMT
4156
4157 // Linear handler, the bsearch head compare is shorter
4158 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
4159 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
4160 //        x86_patch(ins,target)
4161 //[1 + 5] x86_jump_mem(inst,mem)
4162
4163 #define CMP_SIZE 6
4164 #define BR_SMALL_SIZE 2
4165 #define BR_LARGE_SIZE 5
4166 #define JUMP_IMM_SIZE 6
4167
4168 static int
4169 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
4170 {
4171         int i, distance = 0;
4172         for (i = start; i < target; ++i)
4173                 distance += imt_entries [i]->chunk_size;
4174         return distance;
4175 }
4176
4177 /*
4178  * LOCKING: called with the domain lock held
4179  */
4180 gpointer
4181 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
4182 {
4183         int i;
4184         int size = 0;
4185         guint8 *code, *start;
4186
4187         for (i = 0; i < count; ++i) {
4188                 MonoIMTCheckItem *item = imt_entries [i];
4189                 if (item->is_equals) {
4190                         if (item->check_target_idx) {
4191                                 if (!item->compare_done)
4192                                         item->chunk_size += CMP_SIZE;
4193                                 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
4194                         } else {
4195                                 item->chunk_size += JUMP_IMM_SIZE;
4196                                 /* with assert below:
4197                                  * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
4198                                  */
4199                         }
4200                 } else {
4201                         item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
4202                         imt_entries [item->check_target_idx]->compare_done = TRUE;
4203                 }
4204                 size += item->chunk_size;
4205         }
4206         code = mono_code_manager_reserve (domain->code_mp, size);
4207         start = code;
4208         for (i = 0; i < count; ++i) {
4209                 MonoIMTCheckItem *item = imt_entries [i];
4210                 item->code_target = code;
4211                 if (item->is_equals) {
4212                         if (item->check_target_idx) {
4213                                 if (!item->compare_done)
4214                                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4215                                 item->jmp_code = code;
4216                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4217                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4218                         } else {
4219                                 /* enable the commented code to assert on wrong method */
4220                                 /*x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4221                                 item->jmp_code = code;
4222                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);*/
4223                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4224                                 /*x86_patch (item->jmp_code, code);
4225                                 x86_breakpoint (code);
4226                                 item->jmp_code = NULL;*/
4227                         }
4228                 } else {
4229                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4230                         item->jmp_code = code;
4231                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
4232                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
4233                         else
4234                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
4235                 }
4236         }
4237         /* patch the branches to get to the target items */
4238         for (i = 0; i < count; ++i) {
4239                 MonoIMTCheckItem *item = imt_entries [i];
4240                 if (item->jmp_code) {
4241                         if (item->check_target_idx) {
4242                                 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
4243                         }
4244                 }
4245         }
4246                 
4247         mono_stats.imt_thunks_size += code - start;
4248         g_assert (code - start <= size);
4249         return start;
4250 }
4251
4252 MonoMethod*
4253 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
4254 {
4255         return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
4256 }
4257
4258 MonoObject*
4259 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method)
4260 {
4261         MonoMethodSignature *sig = mono_method_signature (method);
4262         CallInfo *cinfo = get_call_info (NULL, sig, FALSE);
4263         int this_argument_offset;
4264         MonoObject *this_argument;
4265
4266         /* 
4267          * this is the offset of the this arg from esp as saved at the start of 
4268          * mono_arch_create_trampoline_code () in tramp-x86.c.
4269          */
4270         this_argument_offset = 5;
4271         if (MONO_TYPE_ISSTRUCT (sig->ret) && (cinfo->ret.storage == ArgOnStack))
4272                 this_argument_offset++;
4273
4274         this_argument = * (MonoObject**) (((guint8*) regs [X86_ESP]) + this_argument_offset * sizeof (gpointer));
4275
4276         g_free (cinfo);
4277         return this_argument;
4278 }
4279 #endif
4280
4281 MonoInst*
4282 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4283 {
4284         MonoInst *ins = NULL;
4285
4286         if (cmethod->klass == mono_defaults.math_class) {
4287                 if (strcmp (cmethod->name, "Sin") == 0) {
4288                         MONO_INST_NEW (cfg, ins, OP_SIN);
4289                         ins->inst_i0 = args [0];
4290                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4291                         MONO_INST_NEW (cfg, ins, OP_COS);
4292                         ins->inst_i0 = args [0];
4293                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4294                         MONO_INST_NEW (cfg, ins, OP_TAN);
4295                         ins->inst_i0 = args [0];
4296                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4297                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4298                         ins->inst_i0 = args [0];
4299                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4300                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4301                         ins->inst_i0 = args [0];
4302                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4303                         MONO_INST_NEW (cfg, ins, OP_ABS);
4304                         ins->inst_i0 = args [0];
4305                 }
4306 #if 0
4307                 /* OP_FREM is not IEEE compatible */
4308                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4309                         MONO_INST_NEW (cfg, ins, OP_FREM);
4310                         ins->inst_i0 = args [0];
4311                         ins->inst_i1 = args [1];
4312                 }
4313 #endif
4314         } else if (cmethod->klass == mono_defaults.thread_class &&
4315                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
4316                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
4317         } else if(cmethod->klass->image == mono_defaults.corlib &&
4318                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4319                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4320
4321                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4322                         MonoInst *ins_iconst;
4323
4324                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4325                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4326                         ins_iconst->inst_c0 = 1;
4327
4328                         ins->inst_i0 = args [0];
4329                         ins->inst_i1 = ins_iconst;
4330                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4331                         MonoInst *ins_iconst;
4332
4333                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4334                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4335                         ins_iconst->inst_c0 = -1;
4336
4337                         ins->inst_i0 = args [0];
4338                         ins->inst_i1 = ins_iconst;
4339                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4340                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4341
4342                         ins->inst_i0 = args [0];
4343                         ins->inst_i1 = args [1];
4344                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4345                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4346
4347                         ins->inst_i0 = args [0];
4348                         ins->inst_i1 = args [1];
4349                 }
4350         }
4351
4352         return ins;
4353 }
4354
4355
4356 gboolean
4357 mono_arch_print_tree (MonoInst *tree, int arity)
4358 {
4359         return 0;
4360 }
4361
4362 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4363 {
4364         MonoInst* ins;
4365         
4366         if (appdomain_tls_offset == -1)
4367                 return NULL;
4368
4369         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4370         ins->inst_offset = appdomain_tls_offset;
4371         return ins;
4372 }
4373
4374 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4375 {
4376         MonoInst* ins;
4377
4378         if (thread_tls_offset == -1)
4379                 return NULL;
4380
4381         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4382         ins->inst_offset = thread_tls_offset;
4383         return ins;
4384 }
4385
4386 guint32
4387 mono_arch_get_patch_offset (guint8 *code)
4388 {
4389         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4390                 return 2;
4391         else if ((code [0] == 0xba))
4392                 return 1;
4393         else if ((code [0] == 0x68))
4394                 /* push IMM */
4395                 return 1;
4396         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4397                 /* push <OFFSET>(<REG>) */
4398                 return 2;
4399         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4400                 /* call *<OFFSET>(<REG>) */
4401                 return 2;
4402         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4403                 /* fldl <ADDR> */
4404                 return 2;
4405         else if ((code [0] == 0x58) && (code [1] == 0x05))
4406                 /* pop %eax; add <OFFSET>, %eax */
4407                 return 2;
4408         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4409                 /* pop <REG>; add <OFFSET>, <REG> */
4410                 return 3;
4411         else {
4412                 g_assert_not_reached ();
4413                 return -1;
4414         }
4415 }
4416
4417 gpointer*
4418 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4419 {
4420         guint8 reg = 0;
4421         gint32 disp = 0;
4422
4423         /* go to the start of the call instruction
4424          *
4425          * address_byte = (m << 6) | (o << 3) | reg
4426          * call opcode: 0xff address_byte displacement
4427          * 0xff m=1,o=2 imm8
4428          * 0xff m=2,o=2 imm32
4429          */
4430         code -= 6;
4431
4432         /* 
4433          * A given byte sequence can match more than case here, so we have to be
4434          * really careful about the ordering of the cases. Longer sequences
4435          * come first.
4436          */
4437         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4438                 /*
4439                  * This is an interface call
4440                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4441                  * ff 10                   call   *(%eax)
4442                  */
4443                 reg = x86_modrm_rm (code [5]);
4444                 disp = 0;
4445 #ifdef MONO_ARCH_HAVE_IMT
4446         } else if ((code [-2] == 0xba) && (code [3] == 0xff) && (x86_modrm_mod (code [4]) == 1) && (x86_modrm_reg (code [4]) == 2) && ((signed char)code [5] < 0)) {
4447                 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == edx
4448                  * ba 14 f8 28 08          mov    $0x828f814,%edx
4449                  * ff 50 fc                call   *0xfffffffc(%eax)
4450                  */
4451                 reg = code [4] & 0x07;
4452                 disp = (signed char)code [5];
4453 #endif
4454         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4455                 reg = code [4] & 0x07;
4456                 disp = (signed char)code [5];
4457         } else {
4458                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4459                         reg = code [1] & 0x07;
4460                         disp = *((gint32*)(code + 2));
4461                 } else if ((code [1] == 0xe8)) {
4462                         return NULL;
4463                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4464                         /*
4465                          * This is a interface call
4466                          * 8b 40 30   mov    0x30(%eax),%eax
4467                          * ff 10      call   *(%eax)
4468                          */
4469                         disp = 0;
4470                         reg = code [5] & 0x07;
4471                 }
4472                 else
4473                         return NULL;
4474         }
4475
4476         return (gpointer*)(((gint32)(regs [reg])) + disp);
4477 }
4478
4479 gpointer
4480 mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
4481 {
4482         guint32 esp = regs [X86_ESP];
4483         CallInfo *cinfo;
4484         gpointer res;
4485
4486         cinfo = get_call_info (NULL, sig, FALSE);
4487
4488         /*
4489          * The stack looks like:
4490          * <other args>
4491          * <this=delegate>
4492          * <possible vtype return address>
4493          * <return addr>
4494          * <4 pointers pushed by mono_arch_create_trampoline_code ()>
4495          */
4496         res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
4497         g_free (cinfo);
4498         return res;
4499 }
4500
4501 gpointer
4502 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
4503 {
4504         guint8 *code, *start;
4505         MonoDomain *domain = mono_domain_get ();
4506
4507         /* FIXME: Support more cases */
4508         if (MONO_TYPE_ISSTRUCT (sig->ret))
4509                 return NULL;
4510
4511         /*
4512          * The stack contains:
4513          * <delegate>
4514          * <return addr>
4515          */
4516
4517         if (has_target) {
4518                 mono_domain_lock (domain);
4519                 start = code = mono_code_manager_reserve (domain->code_mp, 64);
4520                 mono_domain_unlock (domain);
4521
4522                 /* Replace the this argument with the target */
4523                 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4524                 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
4525                 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
4526                 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4527
4528                 g_assert ((code - start) < 64);
4529         } else {
4530                 if (sig->param_count == 0) {
4531                         mono_domain_lock (domain);
4532                         start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
4533                         mono_domain_unlock (domain);
4534                 
4535                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4536                         x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4537                 } else {
4538                         /* 
4539                          * The code below does not work in the presence of exceptions, since it 
4540                          * creates a new frame.
4541                          */
4542                         start = NULL;
4543 #if 0
4544                         for (i = 0; i < sig->param_count; ++i)
4545                                 if (!mono_is_regsize_var (sig->params [i]))
4546                                         return NULL;
4547
4548                         mono_domain_lock (domain);
4549                         start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
4550                         mono_domain_unlock (domain);
4551
4552                         /* Load this == delegate */
4553                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4554
4555                         /* Push arguments in opposite order, taking changes in ESP into account */
4556                         for (i = 0; i < sig->param_count; ++i)
4557                                 x86_push_membase (code, X86_ESP, 4 + (sig->param_count * 4));
4558
4559                         /* Call the delegate */
4560                         x86_call_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4561                         if (sig->param_count > 0)
4562                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, sig->param_count * 4);
4563                         x86_ret (code);
4564 #endif
4565                 }
4566         }
4567
4568         return start;
4569 }