2007-07-11 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/utils/mono-math.h>
23
24 #include "trace.h"
25 #include "mini-x86.h"
26 #include "inssel.h"
27 #include "cpu-x86.h"
28
29 /* On windows, these hold the key returned by TlsAlloc () */
30 static gint lmf_tls_offset = -1;
31 static gint lmf_addr_tls_offset = -1;
32 static gint appdomain_tls_offset = -1;
33 static gint thread_tls_offset = -1;
34
35 #ifdef MONO_XEN_OPT
36 static gboolean optimize_for_xen = TRUE;
37 #else
38 #define optimize_for_xen 0
39 #endif
40
41 #ifdef PLATFORM_WIN32
42 static gboolean is_win32 = TRUE;
43 #else
44 static gboolean is_win32 = FALSE;
45 #endif
46
47 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
48
49 #define ARGS_OFFSET 8
50
51 #ifdef PLATFORM_WIN32
52 /* Under windows, the default pinvoke calling convention is stdcall */
53 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
54 #else
55 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
56 #endif
57
58 #define NOT_IMPLEMENTED g_assert_not_reached ()
59
60 const char*
61 mono_arch_regname (int reg) {
62         switch (reg) {
63         case X86_EAX: return "%eax";
64         case X86_EBX: return "%ebx";
65         case X86_ECX: return "%ecx";
66         case X86_EDX: return "%edx";
67         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
68         case X86_EDI: return "%edi";
69         case X86_ESI: return "%esi";
70         }
71         return "unknown";
72 }
73
74 const char*
75 mono_arch_fregname (int reg) {
76         return "unknown";
77 }
78
79 typedef enum {
80         ArgInIReg,
81         ArgInFloatSSEReg,
82         ArgInDoubleSSEReg,
83         ArgOnStack,
84         ArgValuetypeInReg,
85         ArgOnFloatFpStack,
86         ArgOnDoubleFpStack,
87         ArgNone
88 } ArgStorage;
89
90 typedef struct {
91         gint16 offset;
92         gint8  reg;
93         ArgStorage storage;
94
95         /* Only if storage == ArgValuetypeInReg */
96         ArgStorage pair_storage [2];
97         gint8 pair_regs [2];
98 } ArgInfo;
99
100 typedef struct {
101         int nargs;
102         guint32 stack_usage;
103         guint32 reg_usage;
104         guint32 freg_usage;
105         gboolean need_stack_align;
106         guint32 stack_align_amount;
107         ArgInfo ret;
108         ArgInfo sig_cookie;
109         ArgInfo args [1];
110 } CallInfo;
111
112 #define PARAM_REGS 0
113
114 #define FLOAT_PARAM_REGS 0
115
116 static X86_Reg_No param_regs [] = { 0 };
117
118 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
119 #define SMALL_STRUCTS_IN_REGS
120 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
121 #endif
122
123 static void inline
124 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
125 {
126     ainfo->offset = *stack_size;
127
128     if (*gr >= PARAM_REGS) {
129                 ainfo->storage = ArgOnStack;
130                 (*stack_size) += sizeof (gpointer);
131     }
132     else {
133                 ainfo->storage = ArgInIReg;
134                 ainfo->reg = param_regs [*gr];
135                 (*gr) ++;
136     }
137 }
138
139 static void inline
140 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
141 {
142         ainfo->offset = *stack_size;
143
144         g_assert (PARAM_REGS == 0);
145         
146         ainfo->storage = ArgOnStack;
147         (*stack_size) += sizeof (gpointer) * 2;
148 }
149
150 static void inline
151 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
152 {
153     ainfo->offset = *stack_size;
154
155     if (*gr >= FLOAT_PARAM_REGS) {
156                 ainfo->storage = ArgOnStack;
157                 (*stack_size) += is_double ? 8 : 4;
158     }
159     else {
160                 /* A double register */
161                 if (is_double)
162                         ainfo->storage = ArgInDoubleSSEReg;
163                 else
164                         ainfo->storage = ArgInFloatSSEReg;
165                 ainfo->reg = *gr;
166                 (*gr) += 1;
167     }
168 }
169
170
171 static void
172 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
173                gboolean is_return,
174                guint32 *gr, guint32 *fr, guint32 *stack_size)
175 {
176         guint32 size;
177         MonoClass *klass;
178
179         klass = mono_class_from_mono_type (type);
180         if (sig->pinvoke) 
181                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
182         else 
183                 size = mono_type_stack_size (&klass->byval_arg, NULL);
184
185 #ifdef SMALL_STRUCTS_IN_REGS
186         if (sig->pinvoke && is_return) {
187                 MonoMarshalType *info;
188
189                 /*
190                  * the exact rules are not very well documented, the code below seems to work with the 
191                  * code generated by gcc 3.3.3 -mno-cygwin.
192                  */
193                 info = mono_marshal_load_type_info (klass);
194                 g_assert (info);
195
196                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
197
198                 /* Special case structs with only a float member */
199                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
200                         ainfo->storage = ArgValuetypeInReg;
201                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
202                         return;
203                 }
204                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
205                         ainfo->storage = ArgValuetypeInReg;
206                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
207                         return;
208                 }               
209                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
210                         ainfo->storage = ArgValuetypeInReg;
211                         ainfo->pair_storage [0] = ArgInIReg;
212                         ainfo->pair_regs [0] = return_regs [0];
213                         if (info->native_size > 4) {
214                                 ainfo->pair_storage [1] = ArgInIReg;
215                                 ainfo->pair_regs [1] = return_regs [1];
216                         }
217                         return;
218                 }
219         }
220 #endif
221
222         ainfo->offset = *stack_size;
223         ainfo->storage = ArgOnStack;
224         *stack_size += ALIGN_TO (size, sizeof (gpointer));
225 }
226
227 /*
228  * get_call_info:
229  *
230  *  Obtain information about a call according to the calling convention.
231  * For x86 ELF, see the "System V Application Binary Interface Intel386 
232  * Architecture Processor Supplment, Fourth Edition" document for more
233  * information.
234  * For x86 win32, see ???.
235  */
236 static CallInfo*
237 get_call_info (MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
238 {
239         guint32 i, gr, fr;
240         MonoType *ret_type;
241         int n = sig->hasthis + sig->param_count;
242         guint32 stack_size = 0;
243         CallInfo *cinfo;
244
245         if (mp)
246                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
247         else
248                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
249
250         gr = 0;
251         fr = 0;
252
253         /* return value */
254         {
255                 ret_type = mono_type_get_underlying_type (sig->ret);
256                 switch (ret_type->type) {
257                 case MONO_TYPE_BOOLEAN:
258                 case MONO_TYPE_I1:
259                 case MONO_TYPE_U1:
260                 case MONO_TYPE_I2:
261                 case MONO_TYPE_U2:
262                 case MONO_TYPE_CHAR:
263                 case MONO_TYPE_I4:
264                 case MONO_TYPE_U4:
265                 case MONO_TYPE_I:
266                 case MONO_TYPE_U:
267                 case MONO_TYPE_PTR:
268                 case MONO_TYPE_FNPTR:
269                 case MONO_TYPE_CLASS:
270                 case MONO_TYPE_OBJECT:
271                 case MONO_TYPE_SZARRAY:
272                 case MONO_TYPE_ARRAY:
273                 case MONO_TYPE_STRING:
274                         cinfo->ret.storage = ArgInIReg;
275                         cinfo->ret.reg = X86_EAX;
276                         break;
277                 case MONO_TYPE_U8:
278                 case MONO_TYPE_I8:
279                         cinfo->ret.storage = ArgInIReg;
280                         cinfo->ret.reg = X86_EAX;
281                         break;
282                 case MONO_TYPE_R4:
283                         cinfo->ret.storage = ArgOnFloatFpStack;
284                         break;
285                 case MONO_TYPE_R8:
286                         cinfo->ret.storage = ArgOnDoubleFpStack;
287                         break;
288                 case MONO_TYPE_GENERICINST:
289                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
290                                 cinfo->ret.storage = ArgInIReg;
291                                 cinfo->ret.reg = X86_EAX;
292                                 break;
293                         }
294                         /* Fall through */
295                 case MONO_TYPE_VALUETYPE: {
296                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
297
298                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
299                         if (cinfo->ret.storage == ArgOnStack)
300                                 /* The caller passes the address where the value is stored */
301                                 add_general (&gr, &stack_size, &cinfo->ret);
302                         break;
303                 }
304                 case MONO_TYPE_TYPEDBYREF:
305                         /* Same as a valuetype with size 24 */
306                         add_general (&gr, &stack_size, &cinfo->ret);
307                         ;
308                         break;
309                 case MONO_TYPE_VOID:
310                         cinfo->ret.storage = ArgNone;
311                         break;
312                 default:
313                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
314                 }
315         }
316
317         /* this */
318         if (sig->hasthis)
319                 add_general (&gr, &stack_size, cinfo->args + 0);
320
321         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
322                 gr = PARAM_REGS;
323                 fr = FLOAT_PARAM_REGS;
324                 
325                 /* Emit the signature cookie just before the implicit arguments */
326                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
327         }
328
329         for (i = 0; i < sig->param_count; ++i) {
330                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
331                 MonoType *ptype;
332
333                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
334                         /* We allways pass the sig cookie on the stack for simplicity */
335                         /* 
336                          * Prevent implicit arguments + the sig cookie from being passed 
337                          * in registers.
338                          */
339                         gr = PARAM_REGS;
340                         fr = FLOAT_PARAM_REGS;
341
342                         /* Emit the signature cookie just before the implicit arguments */
343                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
344                 }
345
346                 if (sig->params [i]->byref) {
347                         add_general (&gr, &stack_size, ainfo);
348                         continue;
349                 }
350                 ptype = mono_type_get_underlying_type (sig->params [i]);
351                 switch (ptype->type) {
352                 case MONO_TYPE_BOOLEAN:
353                 case MONO_TYPE_I1:
354                 case MONO_TYPE_U1:
355                         add_general (&gr, &stack_size, ainfo);
356                         break;
357                 case MONO_TYPE_I2:
358                 case MONO_TYPE_U2:
359                 case MONO_TYPE_CHAR:
360                         add_general (&gr, &stack_size, ainfo);
361                         break;
362                 case MONO_TYPE_I4:
363                 case MONO_TYPE_U4:
364                         add_general (&gr, &stack_size, ainfo);
365                         break;
366                 case MONO_TYPE_I:
367                 case MONO_TYPE_U:
368                 case MONO_TYPE_PTR:
369                 case MONO_TYPE_FNPTR:
370                 case MONO_TYPE_CLASS:
371                 case MONO_TYPE_OBJECT:
372                 case MONO_TYPE_STRING:
373                 case MONO_TYPE_SZARRAY:
374                 case MONO_TYPE_ARRAY:
375                         add_general (&gr, &stack_size, ainfo);
376                         break;
377                 case MONO_TYPE_GENERICINST:
378                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
379                                 add_general (&gr, &stack_size, ainfo);
380                                 break;
381                         }
382                         /* Fall through */
383                 case MONO_TYPE_VALUETYPE:
384                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
385                         break;
386                 case MONO_TYPE_TYPEDBYREF:
387                         stack_size += sizeof (MonoTypedRef);
388                         ainfo->storage = ArgOnStack;
389                         break;
390                 case MONO_TYPE_U8:
391                 case MONO_TYPE_I8:
392                         add_general_pair (&gr, &stack_size, ainfo);
393                         break;
394                 case MONO_TYPE_R4:
395                         add_float (&fr, &stack_size, ainfo, FALSE);
396                         break;
397                 case MONO_TYPE_R8:
398                         add_float (&fr, &stack_size, ainfo, TRUE);
399                         break;
400                 default:
401                         g_error ("unexpected type 0x%x", ptype->type);
402                         g_assert_not_reached ();
403                 }
404         }
405
406         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
407                 gr = PARAM_REGS;
408                 fr = FLOAT_PARAM_REGS;
409                 
410                 /* Emit the signature cookie just before the implicit arguments */
411                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
412         }
413
414 #if defined(__APPLE__)
415         if ((stack_size % 16) != 0) { 
416                 cinfo->need_stack_align = TRUE;
417                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
418         }
419 #endif
420
421         cinfo->stack_usage = stack_size;
422         cinfo->reg_usage = gr;
423         cinfo->freg_usage = fr;
424         return cinfo;
425 }
426
427 /*
428  * mono_arch_get_argument_info:
429  * @csig:  a method signature
430  * @param_count: the number of parameters to consider
431  * @arg_info: an array to store the result infos
432  *
433  * Gathers information on parameters such as size, alignment and
434  * padding. arg_info should be large enought to hold param_count + 1 entries. 
435  *
436  * Returns the size of the activation frame.
437  */
438 int
439 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
440 {
441         int k, frame_size = 0;
442         int size, pad;
443         guint32 align;
444         int offset = 8;
445         CallInfo *cinfo;
446
447         cinfo = get_call_info (NULL, csig, FALSE);
448
449         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
450                 frame_size += sizeof (gpointer);
451                 offset += 4;
452         }
453
454         arg_info [0].offset = offset;
455
456         if (csig->hasthis) {
457                 frame_size += sizeof (gpointer);
458                 offset += 4;
459         }
460
461         arg_info [0].size = frame_size;
462
463         for (k = 0; k < param_count; k++) {
464                 
465                 if (csig->pinvoke)
466                         size = mono_type_native_stack_size (csig->params [k], &align);
467                 else {
468                         int ialign;
469                         size = mono_type_stack_size (csig->params [k], &ialign);
470                         align = ialign;
471                 }
472
473                 /* ignore alignment for now */
474                 align = 1;
475
476                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
477                 arg_info [k].pad = pad;
478                 frame_size += size;
479                 arg_info [k + 1].pad = 0;
480                 arg_info [k + 1].size = size;
481                 offset += pad;
482                 arg_info [k + 1].offset = offset;
483                 offset += size;
484         }
485
486         align = MONO_ARCH_FRAME_ALIGNMENT;
487         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
488         arg_info [k].pad = pad;
489
490         g_free (cinfo);
491
492         return frame_size;
493 }
494
495 static const guchar cpuid_impl [] = {
496         0x55,                           /* push   %ebp */
497         0x89, 0xe5,                     /* mov    %esp,%ebp */
498         0x53,                           /* push   %ebx */
499         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
500         0x0f, 0xa2,                     /* cpuid   */
501         0x50,                           /* push   %eax */
502         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
503         0x89, 0x18,                     /* mov    %ebx,(%eax) */
504         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
505         0x89, 0x08,                     /* mov    %ecx,(%eax) */
506         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
507         0x89, 0x10,                     /* mov    %edx,(%eax) */
508         0x58,                           /* pop    %eax */
509         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
510         0x89, 0x02,                     /* mov    %eax,(%edx) */
511         0x5b,                           /* pop    %ebx */
512         0xc9,                           /* leave   */
513         0xc3,                           /* ret     */
514 };
515
516 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
517
518 static int 
519 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
520 {
521         int have_cpuid = 0;
522 #ifndef _MSC_VER
523         __asm__  __volatile__ (
524                 "pushfl\n"
525                 "popl %%eax\n"
526                 "movl %%eax, %%edx\n"
527                 "xorl $0x200000, %%eax\n"
528                 "pushl %%eax\n"
529                 "popfl\n"
530                 "pushfl\n"
531                 "popl %%eax\n"
532                 "xorl %%edx, %%eax\n"
533                 "andl $0x200000, %%eax\n"
534                 "movl %%eax, %0"
535                 : "=r" (have_cpuid)
536                 :
537                 : "%eax", "%edx"
538         );
539 #else
540         __asm {
541                 pushfd
542                 pop eax
543                 mov edx, eax
544                 xor eax, 0x200000
545                 push eax
546                 popfd
547                 pushfd
548                 pop eax
549                 xor eax, edx
550                 and eax, 0x200000
551                 mov have_cpuid, eax
552         }
553 #endif
554         if (have_cpuid) {
555                 /* Have to use the code manager to get around WinXP DEP */
556                 static CpuidFunc func = NULL;
557                 void *ptr;
558                 if (!func) {
559                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
560                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
561                         func = (CpuidFunc)ptr;
562                 }
563                 func (id, p_eax, p_ebx, p_ecx, p_edx);
564
565                 /*
566                  * We use this approach because of issues with gcc and pic code, see:
567                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
568                 __asm__ __volatile__ ("cpuid"
569                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
570                         : "a" (id));
571                 */
572                 return 1;
573         }
574         return 0;
575 }
576
577 /*
578  * Initialize the cpu to execute managed code.
579  */
580 void
581 mono_arch_cpu_init (void)
582 {
583         /* spec compliance requires running with double precision */
584 #ifndef _MSC_VER
585         guint16 fpcw;
586
587         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
588         fpcw &= ~X86_FPCW_PRECC_MASK;
589         fpcw |= X86_FPCW_PREC_DOUBLE;
590         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
591         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
592 #else
593         _control87 (_PC_53, MCW_PC);
594 #endif
595 }
596
597 /*
598  * This function returns the optimizations supported on this cpu.
599  */
600 guint32
601 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
602 {
603         int eax, ebx, ecx, edx;
604         guint32 opts = 0;
605         
606         *exclude_mask = 0;
607         /* Feature Flags function, flags returned in EDX. */
608         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
609                 if (edx & (1 << 15)) {
610                         opts |= MONO_OPT_CMOV;
611                         if (edx & 1)
612                                 opts |= MONO_OPT_FCMOV;
613                         else
614                                 *exclude_mask |= MONO_OPT_FCMOV;
615                 } else
616                         *exclude_mask |= MONO_OPT_CMOV;
617                 if (edx & (1 << 26))
618                         opts |= MONO_OPT_SSE2;
619                 else
620                         *exclude_mask |= MONO_OPT_SSE2;
621         }
622         return opts;
623 }
624
625 /*
626  * Determine whenever the trap whose info is in SIGINFO is caused by
627  * integer overflow.
628  */
629 gboolean
630 mono_arch_is_int_overflow (void *sigctx, void *info)
631 {
632         MonoContext ctx;
633         guint8* ip;
634
635         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
636
637         ip = (guint8*)ctx.eip;
638
639         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
640                 gint32 reg;
641
642                 /* idiv REG */
643                 switch (x86_modrm_rm (ip [1])) {
644                 case X86_EAX:
645                         reg = ctx.eax;
646                         break;
647                 case X86_ECX:
648                         reg = ctx.ecx;
649                         break;
650                 case X86_EDX:
651                         reg = ctx.edx;
652                         break;
653                 case X86_EBX:
654                         reg = ctx.ebx;
655                         break;
656                 case X86_ESI:
657                         reg = ctx.esi;
658                         break;
659                 case X86_EDI:
660                         reg = ctx.edi;
661                         break;
662                 default:
663                         g_assert_not_reached ();
664                         reg = -1;
665                 }
666
667                 if (reg == -1)
668                         return TRUE;
669         }
670                         
671         return FALSE;
672 }
673
674 GList *
675 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
676 {
677         GList *vars = NULL;
678         int i;
679
680         for (i = 0; i < cfg->num_varinfo; i++) {
681                 MonoInst *ins = cfg->varinfo [i];
682                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
683
684                 /* unused vars */
685                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
686                         continue;
687
688                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
689                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
690                         continue;
691
692                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
693                  * 8bit quantities in caller saved registers on x86 */
694                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
695                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
696                         g_assert (i == vmv->idx);
697                         vars = g_list_prepend (vars, vmv);
698                 }
699         }
700
701         vars = mono_varlist_sort (cfg, vars, 0);
702
703         return vars;
704 }
705
706 GList *
707 mono_arch_get_global_int_regs (MonoCompile *cfg)
708 {
709         GList *regs = NULL;
710
711         /* we can use 3 registers for global allocation */
712         regs = g_list_prepend (regs, (gpointer)X86_EBX);
713         regs = g_list_prepend (regs, (gpointer)X86_ESI);
714         regs = g_list_prepend (regs, (gpointer)X86_EDI);
715
716         return regs;
717 }
718
719 /*
720  * mono_arch_regalloc_cost:
721  *
722  *  Return the cost, in number of memory references, of the action of 
723  * allocating the variable VMV into a register during global register
724  * allocation.
725  */
726 guint32
727 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
728 {
729         MonoInst *ins = cfg->varinfo [vmv->idx];
730
731         if (cfg->method->save_lmf)
732                 /* The register is already saved */
733                 return (ins->opcode == OP_ARG) ? 1 : 0;
734         else
735                 /* push+pop+possible load if it is an argument */
736                 return (ins->opcode == OP_ARG) ? 3 : 2;
737 }
738  
739 /*
740  * Set var information according to the calling convention. X86 version.
741  * The locals var stuff should most likely be split in another method.
742  */
743 void
744 mono_arch_allocate_vars (MonoCompile *cfg)
745 {
746         MonoMethodSignature *sig;
747         MonoMethodHeader *header;
748         MonoInst *inst;
749         guint32 locals_stack_size, locals_stack_align;
750         int i, offset;
751         gint32 *offsets;
752         CallInfo *cinfo;
753
754         header = mono_method_get_header (cfg->method);
755         sig = mono_method_signature (cfg->method);
756
757         cinfo = get_call_info (cfg->mempool, sig, FALSE);
758
759         cfg->frame_reg = MONO_ARCH_BASEREG;
760         offset = 0;
761
762         /* Reserve space to save LMF and caller saved registers */
763
764         if (cfg->method->save_lmf) {
765                 offset += sizeof (MonoLMF);
766         } else {
767                 if (cfg->used_int_regs & (1 << X86_EBX)) {
768                         offset += 4;
769                 }
770
771                 if (cfg->used_int_regs & (1 << X86_EDI)) {
772                         offset += 4;
773                 }
774
775                 if (cfg->used_int_regs & (1 << X86_ESI)) {
776                         offset += 4;
777                 }
778         }
779
780         switch (cinfo->ret.storage) {
781         case ArgValuetypeInReg:
782                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
783                 offset += 8;
784                 cfg->ret->opcode = OP_REGOFFSET;
785                 cfg->ret->inst_basereg = X86_EBP;
786                 cfg->ret->inst_offset = - offset;
787                 break;
788         default:
789                 break;
790         }
791
792         /* Allocate locals */
793         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
794         if (locals_stack_align) {
795                 offset += (locals_stack_align - 1);
796                 offset &= ~(locals_stack_align - 1);
797         }
798         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
799                 if (offsets [i] != -1) {
800                         MonoInst *inst = cfg->varinfo [i];
801                         inst->opcode = OP_REGOFFSET;
802                         inst->inst_basereg = X86_EBP;
803                         inst->inst_offset = - (offset + offsets [i]);
804                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
805                 }
806         }
807         offset += locals_stack_size;
808
809
810         /*
811          * Allocate arguments+return value
812          */
813
814         switch (cinfo->ret.storage) {
815         case ArgOnStack:
816                 cfg->ret->opcode = OP_REGOFFSET;
817                 cfg->ret->inst_basereg = X86_EBP;
818                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
819                 break;
820         case ArgValuetypeInReg:
821                 break;
822         case ArgInIReg:
823                 cfg->ret->opcode = OP_REGVAR;
824                 cfg->ret->inst_c0 = cinfo->ret.reg;
825                 break;
826         case ArgNone:
827         case ArgOnFloatFpStack:
828         case ArgOnDoubleFpStack:
829                 break;
830         default:
831                 g_assert_not_reached ();
832         }
833
834         if (sig->call_convention == MONO_CALL_VARARG) {
835                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
836                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
837         }
838
839         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
840                 ArgInfo *ainfo = &cinfo->args [i];
841                 inst = cfg->args [i];
842                 if (inst->opcode != OP_REGVAR) {
843                         inst->opcode = OP_REGOFFSET;
844                         inst->inst_basereg = X86_EBP;
845                 }
846                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
847         }
848
849         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
850         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
851
852         cfg->stack_offset = offset;
853 }
854
855 void
856 mono_arch_create_vars (MonoCompile *cfg)
857 {
858         MonoMethodSignature *sig;
859         CallInfo *cinfo;
860
861         sig = mono_method_signature (cfg->method);
862
863         cinfo = get_call_info (cfg->mempool, sig, FALSE);
864
865         if (cinfo->ret.storage == ArgValuetypeInReg)
866                 cfg->ret_var_is_local = TRUE;
867 }
868
869 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
870  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
871  */
872
873 static void
874 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
875 {
876         MonoInst *arg;
877         MonoMethodSignature *tmp_sig;
878         MonoInst *sig_arg;
879
880         /* FIXME: Add support for signature tokens to AOT */
881         cfg->disable_aot = TRUE;
882         MONO_INST_NEW (cfg, arg, OP_OUTARG);
883
884         /*
885          * mono_ArgIterator_Setup assumes the signature cookie is 
886          * passed first and all the arguments which were before it are
887          * passed on the stack after the signature. So compensate by 
888          * passing a different signature.
889          */
890         tmp_sig = mono_metadata_signature_dup (call->signature);
891         tmp_sig->param_count -= call->signature->sentinelpos;
892         tmp_sig->sentinelpos = 0;
893         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
894
895         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
896         sig_arg->inst_p0 = tmp_sig;
897
898         arg->inst_left = sig_arg;
899         arg->type = STACK_PTR;
900         /* prepend, so they get reversed */
901         arg->next = call->out_args;
902         call->out_args = arg;
903 }
904
905 /* 
906  * take the arguments and generate the arch-specific
907  * instructions to properly call the function in call.
908  * This includes pushing, moving arguments to the right register
909  * etc.
910  */
911 MonoCallInst*
912 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
913         MonoInst *arg, *in;
914         MonoMethodSignature *sig;
915         int i, n;
916         CallInfo *cinfo;
917         int sentinelpos = 0;
918
919         sig = call->signature;
920         n = sig->param_count + sig->hasthis;
921
922         cinfo = get_call_info (cfg->mempool, sig, FALSE);
923
924         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
925                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
926
927         for (i = 0; i < n; ++i) {
928                 ArgInfo *ainfo = cinfo->args + i;
929
930                 /* Emit the signature cookie just before the implicit arguments */
931                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
932                         emit_sig_cookie (cfg, call);
933                 }
934
935                 if (is_virtual && i == 0) {
936                         /* the argument will be attached to the call instrucion */
937                         in = call->args [i];
938                 } else {
939                         MonoType *t;
940
941                         if (i >= sig->hasthis)
942                                 t = sig->params [i - sig->hasthis];
943                         else
944                                 t = &mono_defaults.int_class->byval_arg;
945                         t = mono_type_get_underlying_type (t);
946
947                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
948                         in = call->args [i];
949                         arg->cil_code = in->cil_code;
950                         arg->inst_left = in;
951                         arg->type = in->type;
952                         /* prepend, so they get reversed */
953                         arg->next = call->out_args;
954                         call->out_args = arg;
955
956                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
957                                 guint32 size, align;
958
959                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
960                                         size = sizeof (MonoTypedRef);
961                                         align = sizeof (gpointer);
962                                 }
963                                 else
964                                         if (sig->pinvoke)
965                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
966                                         else {
967                                                 int ialign;
968                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
969                                                 align = ialign;
970                                         }
971                                 arg->opcode = OP_OUTARG_VT;
972                                 arg->klass = in->klass;
973                                 arg->backend.is_pinvoke = sig->pinvoke;
974                                 arg->inst_imm = size; 
975                         }
976                         else {
977                                 switch (ainfo->storage) {
978                                 case ArgOnStack:
979                                         arg->opcode = OP_OUTARG;
980                                         if (!t->byref) {
981                                                 if (t->type == MONO_TYPE_R4)
982                                                         arg->opcode = OP_OUTARG_R4;
983                                                 else
984                                                         if (t->type == MONO_TYPE_R8)
985                                                                 arg->opcode = OP_OUTARG_R8;
986                                         }
987                                         break;
988                                 default:
989                                         g_assert_not_reached ();
990                                 }
991                         }
992                 }
993         }
994
995         /* Handle the case where there are no implicit arguments */
996         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
997                 emit_sig_cookie (cfg, call);
998         }
999
1000         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1001                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1002                         MonoInst *zero_inst;
1003                         /*
1004                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1005                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1006                          * before calling the function. So we add a dummy instruction to represent pushing the 
1007                          * struct return address to the stack. The return address will be saved to this stack slot 
1008                          * by the code emitted in this_vret_args.
1009                          */
1010                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1011                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1012                         zero_inst->inst_p0 = 0;
1013                         arg->inst_left = zero_inst;
1014                         arg->type = STACK_PTR;
1015                         /* prepend, so they get reversed */
1016                         arg->next = call->out_args;
1017                         call->out_args = arg;
1018                 }
1019                 else
1020                         /* if the function returns a struct, the called method already does a ret $0x4 */
1021                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1022                                 cinfo->stack_usage -= 4;
1023         }
1024         
1025         call->stack_usage = cinfo->stack_usage;
1026
1027 #if defined(__APPLE__)
1028         if (cinfo->need_stack_align) {
1029                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1030                 arg->inst_c0 = cinfo->stack_align_amount;
1031                 arg->next = call->out_args;
1032                 call->out_args = arg;
1033         }
1034 #endif 
1035
1036         return call;
1037 }
1038
1039 /*
1040  * Allow tracing to work with this interface (with an optional argument)
1041  */
1042 void*
1043 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1044 {
1045         guchar *code = p;
1046
1047 #if __APPLE__
1048         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1049 #endif
1050
1051         /* if some args are passed in registers, we need to save them here */
1052         x86_push_reg (code, X86_EBP);
1053
1054         if (cfg->compile_aot) {
1055                 x86_push_imm (code, cfg->method);
1056                 x86_mov_reg_imm (code, X86_EAX, func);
1057                 x86_call_reg (code, X86_EAX);
1058         } else {
1059                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1060                 x86_push_imm (code, cfg->method);
1061                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1062                 x86_call_code (code, 0);
1063         }
1064 #if __APPLE__
1065         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1066 #else
1067         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1068 #endif
1069
1070         return code;
1071 }
1072
1073 enum {
1074         SAVE_NONE,
1075         SAVE_STRUCT,
1076         SAVE_EAX,
1077         SAVE_EAX_EDX,
1078         SAVE_FP
1079 };
1080
1081 void*
1082 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1083 {
1084         guchar *code = p;
1085         int arg_size = 0, save_mode = SAVE_NONE;
1086         MonoMethod *method = cfg->method;
1087         
1088         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1089         case MONO_TYPE_VOID:
1090                 /* special case string .ctor icall */
1091                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1092                         save_mode = SAVE_EAX;
1093                 else
1094                         save_mode = SAVE_NONE;
1095                 break;
1096         case MONO_TYPE_I8:
1097         case MONO_TYPE_U8:
1098                 save_mode = SAVE_EAX_EDX;
1099                 break;
1100         case MONO_TYPE_R4:
1101         case MONO_TYPE_R8:
1102                 save_mode = SAVE_FP;
1103                 break;
1104         case MONO_TYPE_GENERICINST:
1105                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1106                         save_mode = SAVE_EAX;
1107                         break;
1108                 }
1109                 /* Fall through */
1110         case MONO_TYPE_VALUETYPE:
1111                 save_mode = SAVE_STRUCT;
1112                 break;
1113         default:
1114                 save_mode = SAVE_EAX;
1115                 break;
1116         }
1117
1118         switch (save_mode) {
1119         case SAVE_EAX_EDX:
1120                 x86_push_reg (code, X86_EDX);
1121                 x86_push_reg (code, X86_EAX);
1122                 if (enable_arguments) {
1123                         x86_push_reg (code, X86_EDX);
1124                         x86_push_reg (code, X86_EAX);
1125                         arg_size = 8;
1126                 }
1127                 break;
1128         case SAVE_EAX:
1129                 x86_push_reg (code, X86_EAX);
1130                 if (enable_arguments) {
1131                         x86_push_reg (code, X86_EAX);
1132                         arg_size = 4;
1133                 }
1134                 break;
1135         case SAVE_FP:
1136                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1137                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1138                 if (enable_arguments) {
1139                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1140                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1141                         arg_size = 8;
1142                 }
1143                 break;
1144         case SAVE_STRUCT:
1145                 if (enable_arguments) {
1146                         x86_push_membase (code, X86_EBP, 8);
1147                         arg_size = 4;
1148                 }
1149                 break;
1150         case SAVE_NONE:
1151         default:
1152                 break;
1153         }
1154
1155         if (cfg->compile_aot) {
1156                 x86_push_imm (code, method);
1157                 x86_mov_reg_imm (code, X86_EAX, func);
1158                 x86_call_reg (code, X86_EAX);
1159         } else {
1160                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1161                 x86_push_imm (code, method);
1162                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1163                 x86_call_code (code, 0);
1164         }
1165         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1166
1167         switch (save_mode) {
1168         case SAVE_EAX_EDX:
1169                 x86_pop_reg (code, X86_EAX);
1170                 x86_pop_reg (code, X86_EDX);
1171                 break;
1172         case SAVE_EAX:
1173                 x86_pop_reg (code, X86_EAX);
1174                 break;
1175         case SAVE_FP:
1176                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1177                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1178                 break;
1179         case SAVE_NONE:
1180         default:
1181                 break;
1182         }
1183
1184         return code;
1185 }
1186
1187 #define EMIT_COND_BRANCH(ins,cond,sign) \
1188 if (ins->flags & MONO_INST_BRLABEL) { \
1189         if (ins->inst_i0->inst_c0) { \
1190                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1191         } else { \
1192                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1193                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1194                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1195                         x86_branch8 (code, cond, 0, sign); \
1196                 else \
1197                         x86_branch32 (code, cond, 0, sign); \
1198         } \
1199 } else { \
1200         if (ins->inst_true_bb->native_offset) { \
1201                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1202         } else { \
1203                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1204                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1205                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1206                         x86_branch8 (code, cond, 0, sign); \
1207                 else \
1208                         x86_branch32 (code, cond, 0, sign); \
1209         } \
1210 }
1211
1212 /*  
1213  *      Emit an exception if condition is fail and
1214  *  if possible do a directly branch to target 
1215  */
1216 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1217         do {                                                        \
1218                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1219                 if (tins == NULL) {                                                                             \
1220                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1221                                         MONO_PATCH_INFO_EXC, exc_name);  \
1222                         x86_branch32 (code, cond, 0, signed);               \
1223                 } else {        \
1224                         EMIT_COND_BRANCH (tins, cond, signed);  \
1225                 }                       \
1226         } while (0); 
1227
1228 #define EMIT_FPCOMPARE(code) do { \
1229         x86_fcompp (code); \
1230         x86_fnstsw (code); \
1231 } while (0); 
1232
1233
1234 static guint8*
1235 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1236 {
1237         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1238         x86_call_code (code, 0);
1239
1240         return code;
1241 }
1242
1243 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1244
1245 /*
1246  * peephole_pass_1:
1247  *
1248  *   Perform peephole opts which should/can be performed before local regalloc
1249  */
1250 static void
1251 peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1252 {
1253         MonoInst *ins, *last_ins = NULL;
1254         ins = bb->code;
1255
1256         while (ins) {
1257                 switch (ins->opcode) {
1258                 case OP_IADD_IMM:
1259                 case OP_ADD_IMM:
1260                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1261                                 /* 
1262                                  * X86_LEA is like ADD, but doesn't have the
1263                                  * sreg1==dreg restriction.
1264                                  */
1265                                 ins->opcode = OP_X86_LEA_MEMBASE;
1266                                 ins->inst_basereg = ins->sreg1;
1267                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1268                                 ins->opcode = OP_X86_INC_REG;
1269                         break;
1270                 case OP_SUB_IMM:
1271                 case OP_ISUB_IMM:
1272                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1273                                 ins->opcode = OP_X86_LEA_MEMBASE;
1274                                 ins->inst_basereg = ins->sreg1;
1275                                 ins->inst_imm = -ins->inst_imm;
1276                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1277                                 ins->opcode = OP_X86_DEC_REG;
1278                         break;
1279                 case OP_COMPARE_IMM:
1280                 case OP_ICOMPARE_IMM:
1281                         /* OP_COMPARE_IMM (reg, 0) 
1282                          * --> 
1283                          * OP_X86_TEST_NULL (reg) 
1284                          */
1285                         if (!ins->inst_imm)
1286                                 ins->opcode = OP_X86_TEST_NULL;
1287                         break;
1288                 case OP_X86_COMPARE_MEMBASE_IMM:
1289                         /* 
1290                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1291                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1292                          * -->
1293                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1294                          * OP_COMPARE_IMM reg, imm
1295                          *
1296                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1297                          */
1298                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1299                             ins->inst_basereg == last_ins->inst_destbasereg &&
1300                             ins->inst_offset == last_ins->inst_offset) {
1301                                         ins->opcode = OP_COMPARE_IMM;
1302                                         ins->sreg1 = last_ins->sreg1;
1303
1304                                         /* check if we can remove cmp reg,0 with test null */
1305                                         if (!ins->inst_imm)
1306                                                 ins->opcode = OP_X86_TEST_NULL;
1307                                 }
1308
1309                         break;
1310                 case OP_LOAD_MEMBASE:
1311                 case OP_LOADI4_MEMBASE:
1312                         /* 
1313                          * Note: if reg1 = reg2 the load op is removed
1314                          *
1315                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1316                          * OP_LOAD_MEMBASE offset(basereg), reg2
1317                          * -->
1318                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1319                          * OP_MOVE reg1, reg2
1320                          */
1321                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1322                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1323                             ins->inst_basereg == last_ins->inst_destbasereg &&
1324                             ins->inst_offset == last_ins->inst_offset) {
1325                                 if (ins->dreg == last_ins->sreg1) {
1326                                         last_ins->next = ins->next;                             
1327                                         ins = ins->next;                                
1328                                         continue;
1329                                 } else {
1330                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1331                                         ins->opcode = OP_MOVE;
1332                                         ins->sreg1 = last_ins->sreg1;
1333                                 }
1334
1335                         /* 
1336                          * Note: reg1 must be different from the basereg in the second load
1337                          * Note: if reg1 = reg2 is equal then second load is removed
1338                          *
1339                          * OP_LOAD_MEMBASE offset(basereg), reg1
1340                          * OP_LOAD_MEMBASE offset(basereg), reg2
1341                          * -->
1342                          * OP_LOAD_MEMBASE offset(basereg), reg1
1343                          * OP_MOVE reg1, reg2
1344                          */
1345                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1346                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1347                               ins->inst_basereg != last_ins->dreg &&
1348                               ins->inst_basereg == last_ins->inst_basereg &&
1349                               ins->inst_offset == last_ins->inst_offset) {
1350
1351                                 if (ins->dreg == last_ins->dreg) {
1352                                         last_ins->next = ins->next;                             
1353                                         ins = ins->next;                                
1354                                         continue;
1355                                 } else {
1356                                         ins->opcode = OP_MOVE;
1357                                         ins->sreg1 = last_ins->dreg;
1358                                 }
1359
1360                                 //g_assert_not_reached ();
1361
1362 #if 0
1363                         /* 
1364                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1365                          * OP_LOAD_MEMBASE offset(basereg), reg
1366                          * -->
1367                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1368                          * OP_ICONST reg, imm
1369                          */
1370                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1371                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1372                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1373                                    ins->inst_offset == last_ins->inst_offset) {
1374                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1375                                 ins->opcode = OP_ICONST;
1376                                 ins->inst_c0 = last_ins->inst_imm;
1377                                 g_assert_not_reached (); // check this rule
1378 #endif
1379                         }
1380                         break;
1381                 case OP_LOADU1_MEMBASE:
1382                 case OP_LOADI1_MEMBASE:
1383                         /* 
1384                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1385                          * OP_LOAD_MEMBASE offset(basereg), reg2
1386                          * -->
1387                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1388                          * CONV_I2/U2 reg1, reg2
1389                          */
1390                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1391                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1392                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1393                                         ins->inst_offset == last_ins->inst_offset) {
1394                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1395                                 ins->sreg1 = last_ins->sreg1;
1396                         }
1397                         break;
1398                 case OP_LOADU2_MEMBASE:
1399                 case OP_LOADI2_MEMBASE:
1400                         /* 
1401                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1402                          * OP_LOAD_MEMBASE offset(basereg), reg2
1403                          * -->
1404                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1405                          * CONV_I2/U2 reg1, reg2
1406                          */
1407                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1408                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1409                                         ins->inst_offset == last_ins->inst_offset) {
1410                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1411                                 ins->sreg1 = last_ins->sreg1;
1412                         }
1413                         break;
1414                 case CEE_CONV_I4:
1415                 case CEE_CONV_U4:
1416                 case OP_ICONV_TO_I4:
1417                 case OP_MOVE:
1418                         /*
1419                          * Removes:
1420                          *
1421                          * OP_MOVE reg, reg 
1422                          */
1423                         if (ins->dreg == ins->sreg1) {
1424                                 if (last_ins)
1425                                         last_ins->next = ins->next;                             
1426                                 ins = ins->next;
1427                                 continue;
1428                         }
1429                         /* 
1430                          * Removes:
1431                          *
1432                          * OP_MOVE sreg, dreg 
1433                          * OP_MOVE dreg, sreg
1434                          */
1435                         if (last_ins && last_ins->opcode == OP_MOVE &&
1436                             ins->sreg1 == last_ins->dreg &&
1437                             ins->dreg == last_ins->sreg1) {
1438                                 last_ins->next = ins->next;                             
1439                                 ins = ins->next;                                
1440                                 continue;
1441                         }
1442                         break;
1443                         
1444                 case OP_X86_PUSH_MEMBASE:
1445                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1446                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1447                             ins->inst_basereg == last_ins->inst_destbasereg &&
1448                             ins->inst_offset == last_ins->inst_offset) {
1449                                     ins->opcode = OP_X86_PUSH;
1450                                     ins->sreg1 = last_ins->sreg1;
1451                         }
1452                         break;
1453                 }
1454                 last_ins = ins;
1455                 ins = ins->next;
1456         }
1457         bb->last_ins = last_ins;
1458 }
1459
1460 static void
1461 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1462 {
1463         MonoInst *ins, *last_ins = NULL;
1464         ins = bb->code;
1465
1466         while (ins) {
1467
1468                 switch (ins->opcode) {
1469                 case OP_ICONST:
1470                         /* reg = 0 -> XOR (reg, reg) */
1471                         /* XOR sets cflags on x86, so we cant do it always */
1472                         if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1473                                 MonoInst *ins2;
1474
1475                                 ins->opcode = OP_IXOR;
1476                                 ins->sreg1 = ins->dreg;
1477                                 ins->sreg2 = ins->dreg;
1478
1479                                 /* 
1480                                  * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
1481                                  * since it takes 3 bytes instead of 7.
1482                                  */
1483                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1484                                         if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1485                                                 ins2->opcode = OP_STORE_MEMBASE_REG;
1486                                                 ins2->sreg1 = ins->dreg;
1487                                         }
1488                                         else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1489                                                 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1490                                                 ins2->sreg1 = ins->dreg;
1491                                         }
1492                                         else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1493                                                 /* Continue iteration */
1494                                         }
1495                                         else
1496                                                 break;
1497                                 }
1498                         }
1499                         break;
1500                 case OP_IADD_IMM:
1501                 case OP_ADD_IMM:
1502                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1503                                 ins->opcode = OP_X86_INC_REG;
1504                         break;
1505                 case OP_ISUB_IMM:
1506                 case OP_SUB_IMM:
1507                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1508                                 ins->opcode = OP_X86_DEC_REG;
1509                         break;
1510                 case OP_X86_COMPARE_MEMBASE_IMM:
1511                         /* 
1512                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1513                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1514                          * -->
1515                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1516                          * OP_COMPARE_IMM reg, imm
1517                          *
1518                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1519                          */
1520                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1521                             ins->inst_basereg == last_ins->inst_destbasereg &&
1522                             ins->inst_offset == last_ins->inst_offset) {
1523                                         ins->opcode = OP_COMPARE_IMM;
1524                                         ins->sreg1 = last_ins->sreg1;
1525
1526                                         /* check if we can remove cmp reg,0 with test null */
1527                                         if (!ins->inst_imm)
1528                                                 ins->opcode = OP_X86_TEST_NULL;
1529                                 }
1530
1531                         break;
1532                 case OP_LOAD_MEMBASE:
1533                 case OP_LOADI4_MEMBASE:
1534                         /* 
1535                          * Note: if reg1 = reg2 the load op is removed
1536                          *
1537                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1538                          * OP_LOAD_MEMBASE offset(basereg), reg2
1539                          * -->
1540                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1541                          * OP_MOVE reg1, reg2
1542                          */
1543                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1544                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1545                             ins->inst_basereg == last_ins->inst_destbasereg &&
1546                             ins->inst_offset == last_ins->inst_offset) {
1547                                 if (ins->dreg == last_ins->sreg1) {
1548                                         last_ins->next = ins->next;                             
1549                                         ins = ins->next;                                
1550                                         continue;
1551                                 } else {
1552                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1553                                         ins->opcode = OP_MOVE;
1554                                         ins->sreg1 = last_ins->sreg1;
1555                                 }
1556
1557                         /* 
1558                          * Note: reg1 must be different from the basereg in the second load
1559                          * Note: if reg1 = reg2 is equal then second load is removed
1560                          *
1561                          * OP_LOAD_MEMBASE offset(basereg), reg1
1562                          * OP_LOAD_MEMBASE offset(basereg), reg2
1563                          * -->
1564                          * OP_LOAD_MEMBASE offset(basereg), reg1
1565                          * OP_MOVE reg1, reg2
1566                          */
1567                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1568                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1569                               ins->inst_basereg != last_ins->dreg &&
1570                               ins->inst_basereg == last_ins->inst_basereg &&
1571                               ins->inst_offset == last_ins->inst_offset) {
1572
1573                                 if (ins->dreg == last_ins->dreg) {
1574                                         last_ins->next = ins->next;                             
1575                                         ins = ins->next;                                
1576                                         continue;
1577                                 } else {
1578                                         ins->opcode = OP_MOVE;
1579                                         ins->sreg1 = last_ins->dreg;
1580                                 }
1581
1582                                 //g_assert_not_reached ();
1583
1584 #if 0
1585                         /* 
1586                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1587                          * OP_LOAD_MEMBASE offset(basereg), reg
1588                          * -->
1589                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1590                          * OP_ICONST reg, imm
1591                          */
1592                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1593                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1594                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1595                                    ins->inst_offset == last_ins->inst_offset) {
1596                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1597                                 ins->opcode = OP_ICONST;
1598                                 ins->inst_c0 = last_ins->inst_imm;
1599                                 g_assert_not_reached (); // check this rule
1600 #endif
1601                         }
1602                         break;
1603                 case OP_LOADU1_MEMBASE:
1604                 case OP_LOADI1_MEMBASE:
1605                         /* 
1606                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1607                          * OP_LOAD_MEMBASE offset(basereg), reg2
1608                          * -->
1609                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1610                          * CONV_I2/U2 reg1, reg2
1611                          */
1612                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1613                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1614                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1615                                         ins->inst_offset == last_ins->inst_offset) {
1616                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1617                                 ins->sreg1 = last_ins->sreg1;
1618                         }
1619                         break;
1620                 case OP_LOADU2_MEMBASE:
1621                 case OP_LOADI2_MEMBASE:
1622                         /* 
1623                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1624                          * OP_LOAD_MEMBASE offset(basereg), reg2
1625                          * -->
1626                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1627                          * CONV_I2/U2 reg1, reg2
1628                          */
1629                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1630                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1631                                         ins->inst_offset == last_ins->inst_offset) {
1632                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1633                                 ins->sreg1 = last_ins->sreg1;
1634                         }
1635                         break;
1636                 case CEE_CONV_I4:
1637                 case CEE_CONV_U4:
1638                 case OP_ICONV_TO_I4:
1639                 case OP_MOVE:
1640                         /*
1641                          * Removes:
1642                          *
1643                          * OP_MOVE reg, reg 
1644                          */
1645                         if (ins->dreg == ins->sreg1) {
1646                                 if (last_ins)
1647                                         last_ins->next = ins->next;                             
1648                                 ins = ins->next;
1649                                 continue;
1650                         }
1651                         /* 
1652                          * Removes:
1653                          *
1654                          * OP_MOVE sreg, dreg 
1655                          * OP_MOVE dreg, sreg
1656                          */
1657                         if (last_ins && last_ins->opcode == OP_MOVE &&
1658                             ins->sreg1 == last_ins->dreg &&
1659                             ins->dreg == last_ins->sreg1) {
1660                                 last_ins->next = ins->next;                             
1661                                 ins = ins->next;                                
1662                                 continue;
1663                         }
1664                         break;
1665                 case OP_X86_PUSH_MEMBASE:
1666                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1667                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1668                             ins->inst_basereg == last_ins->inst_destbasereg &&
1669                             ins->inst_offset == last_ins->inst_offset) {
1670                                     ins->opcode = OP_X86_PUSH;
1671                                     ins->sreg1 = last_ins->sreg1;
1672                         }
1673                         break;
1674                 }
1675                 last_ins = ins;
1676                 ins = ins->next;
1677         }
1678         bb->last_ins = last_ins;
1679 }
1680
1681 static const int 
1682 branch_cc_table [] = {
1683         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1684         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1685         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1686 };
1687
1688 /* Maps CMP_... constants to X86_CC_... constants */
1689 static const int
1690 cc_table [] = {
1691         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1692         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1693 };
1694
1695 static const int
1696 cc_signed_table [] = {
1697         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1698         FALSE, FALSE, FALSE, FALSE
1699 };
1700
1701 void
1702 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1703 {
1704         if (cfg->opt & MONO_OPT_PEEPHOLE)
1705                 peephole_pass_1 (cfg, bb);
1706
1707         mono_local_regalloc (cfg, bb);
1708 }
1709
1710 static unsigned char*
1711 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1712 {
1713 #define XMM_TEMP_REG 0
1714         if (cfg->opt & MONO_OPT_SSE2 && size < 8) {
1715                 /* optimize by assigning a local var for this use so we avoid
1716                  * the stack manipulations */
1717                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1718                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1719                 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1720                 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1721                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1722                 if (size == 1)
1723                         x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1724                 else if (size == 2)
1725                         x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1726                 return code;
1727         }
1728         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1729         x86_fnstcw_membase(code, X86_ESP, 0);
1730         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1731         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1732         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1733         x86_fldcw_membase (code, X86_ESP, 2);
1734         if (size == 8) {
1735                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1736                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1737                 x86_pop_reg (code, dreg);
1738                 /* FIXME: need the high register 
1739                  * x86_pop_reg (code, dreg_high);
1740                  */
1741         } else {
1742                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1743                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1744                 x86_pop_reg (code, dreg);
1745         }
1746         x86_fldcw_membase (code, X86_ESP, 0);
1747         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1748
1749         if (size == 1)
1750                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1751         else if (size == 2)
1752                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1753         return code;
1754 }
1755
1756 static unsigned char*
1757 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1758 {
1759         int sreg = tree->sreg1;
1760         int need_touch = FALSE;
1761
1762 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1763         need_touch = TRUE;
1764 #endif
1765
1766         if (need_touch) {
1767                 guint8* br[5];
1768
1769                 /*
1770                  * Under Windows:
1771                  * If requested stack size is larger than one page,
1772                  * perform stack-touch operation
1773                  */
1774                 /*
1775                  * Generate stack probe code.
1776                  * Under Windows, it is necessary to allocate one page at a time,
1777                  * "touching" stack after each successful sub-allocation. This is
1778                  * because of the way stack growth is implemented - there is a
1779                  * guard page before the lowest stack page that is currently commited.
1780                  * Stack normally grows sequentially so OS traps access to the
1781                  * guard page and commits more pages when needed.
1782                  */
1783                 x86_test_reg_imm (code, sreg, ~0xFFF);
1784                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1785
1786                 br[2] = code; /* loop */
1787                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1788                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1789
1790                 /* 
1791                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1792                  * that follows only initializes the last part of the area.
1793                  */
1794                 /* Same as the init code below with size==0x1000 */
1795                 if (tree->flags & MONO_INST_INIT) {
1796                         x86_push_reg (code, X86_EAX);
1797                         x86_push_reg (code, X86_ECX);
1798                         x86_push_reg (code, X86_EDI);
1799                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1800                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1801                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1802                         x86_cld (code);
1803                         x86_prefix (code, X86_REP_PREFIX);
1804                         x86_stosl (code);
1805                         x86_pop_reg (code, X86_EDI);
1806                         x86_pop_reg (code, X86_ECX);
1807                         x86_pop_reg (code, X86_EAX);
1808                 }
1809
1810                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1811                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1812                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1813                 x86_patch (br[3], br[2]);
1814                 x86_test_reg_reg (code, sreg, sreg);
1815                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1816                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1817
1818                 br[1] = code; x86_jump8 (code, 0);
1819
1820                 x86_patch (br[0], code);
1821                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1822                 x86_patch (br[1], code);
1823                 x86_patch (br[4], code);
1824         }
1825         else
1826                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1827
1828         if (tree->flags & MONO_INST_INIT) {
1829                 int offset = 0;
1830                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1831                         x86_push_reg (code, X86_EAX);
1832                         offset += 4;
1833                 }
1834                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1835                         x86_push_reg (code, X86_ECX);
1836                         offset += 4;
1837                 }
1838                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1839                         x86_push_reg (code, X86_EDI);
1840                         offset += 4;
1841                 }
1842                 
1843                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1844                 if (sreg != X86_ECX)
1845                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1846                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1847                                 
1848                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1849                 x86_cld (code);
1850                 x86_prefix (code, X86_REP_PREFIX);
1851                 x86_stosl (code);
1852                 
1853                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1854                         x86_pop_reg (code, X86_EDI);
1855                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1856                         x86_pop_reg (code, X86_ECX);
1857                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1858                         x86_pop_reg (code, X86_EAX);
1859         }
1860         return code;
1861 }
1862
1863
1864 static guint8*
1865 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1866 {
1867         CallInfo *cinfo;
1868         int quad;
1869
1870         /* Move return value to the target register */
1871         switch (ins->opcode) {
1872         case CEE_CALL:
1873         case OP_CALL_REG:
1874         case OP_CALL_MEMBASE:
1875                 if (ins->dreg != X86_EAX)
1876                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1877                 break;
1878         case OP_VCALL:
1879         case OP_VCALL_REG:
1880         case OP_VCALL_MEMBASE:
1881                 cinfo = get_call_info (cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1882                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1883                         /* Pop the destination address from the stack */
1884                         x86_pop_reg (code, X86_ECX);
1885                         
1886                         for (quad = 0; quad < 2; quad ++) {
1887                                 switch (cinfo->ret.pair_storage [quad]) {
1888                                 case ArgInIReg:
1889                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1890                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1891                                         break;
1892                                 case ArgNone:
1893                                         break;
1894                                 default:
1895                                         g_assert_not_reached ();
1896                                 }
1897                         }
1898                 }
1899         default:
1900                 break;
1901         }
1902
1903         return code;
1904 }
1905
1906 /*
1907  * emit_tls_get:
1908  * @code: buffer to store code to
1909  * @dreg: hard register where to place the result
1910  * @tls_offset: offset info
1911  *
1912  * emit_tls_get emits in @code the native code that puts in the dreg register
1913  * the item in the thread local storage identified by tls_offset.
1914  *
1915  * Returns: a pointer to the end of the stored code
1916  */
1917 static guint8*
1918 emit_tls_get (guint8* code, int dreg, int tls_offset)
1919 {
1920 #ifdef PLATFORM_WIN32
1921         /* 
1922          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1923          * Journal and/or a disassembly of the TlsGet () function.
1924          */
1925         g_assert (tls_offset < 64);
1926         x86_prefix (code, X86_FS_PREFIX);
1927         x86_mov_reg_mem (code, dreg, 0x18, 4);
1928         /* Dunno what this does but TlsGetValue () contains it */
1929         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1930         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1931 #else
1932         if (optimize_for_xen) {
1933                 x86_prefix (code, X86_GS_PREFIX);
1934                 x86_mov_reg_mem (code, dreg, 0, 4);
1935                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1936         } else {
1937                 x86_prefix (code, X86_GS_PREFIX);
1938                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1939         }
1940 #endif
1941         return code;
1942 }
1943
1944 /*
1945  * emit_load_volatile_arguments:
1946  *
1947  *  Load volatile arguments from the stack to the original input registers.
1948  * Required before a tail call.
1949  */
1950 static guint8*
1951 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1952 {
1953         MonoMethod *method = cfg->method;
1954         MonoMethodSignature *sig;
1955         MonoInst *inst;
1956         CallInfo *cinfo;
1957         guint32 i;
1958
1959         /* FIXME: Generate intermediate code instead */
1960
1961         sig = mono_method_signature (method);
1962
1963         cinfo = get_call_info (cfg->mempool, sig, FALSE);
1964         
1965         /* This is the opposite of the code in emit_prolog */
1966
1967         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1968                 ArgInfo *ainfo = cinfo->args + i;
1969                 MonoType *arg_type;
1970                 inst = cfg->args [i];
1971
1972                 if (sig->hasthis && (i == 0))
1973                         arg_type = &mono_defaults.object_class->byval_arg;
1974                 else
1975                         arg_type = sig->params [i - sig->hasthis];
1976
1977                 /*
1978                  * On x86, the arguments are either in their original stack locations, or in
1979                  * global regs.
1980                  */
1981                 if (inst->opcode == OP_REGVAR) {
1982                         g_assert (ainfo->storage == ArgOnStack);
1983                         
1984                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
1985                 }
1986         }
1987
1988         return code;
1989 }
1990
1991 #define REAL_PRINT_REG(text,reg) \
1992 mono_assert (reg >= 0); \
1993 x86_push_reg (code, X86_EAX); \
1994 x86_push_reg (code, X86_EDX); \
1995 x86_push_reg (code, X86_ECX); \
1996 x86_push_reg (code, reg); \
1997 x86_push_imm (code, reg); \
1998 x86_push_imm (code, text " %d %p\n"); \
1999 x86_mov_reg_imm (code, X86_EAX, printf); \
2000 x86_call_reg (code, X86_EAX); \
2001 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2002 x86_pop_reg (code, X86_ECX); \
2003 x86_pop_reg (code, X86_EDX); \
2004 x86_pop_reg (code, X86_EAX);
2005
2006 /* benchmark and set based on cpu */
2007 #define LOOP_ALIGNMENT 8
2008 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2009
2010 void
2011 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2012 {
2013         MonoInst *ins;
2014         MonoCallInst *call;
2015         guint offset;
2016         guint8 *code = cfg->native_code + cfg->code_len;
2017         MonoInst *last_ins = NULL;
2018         guint last_offset = 0;
2019         int max_len, cpos;
2020
2021         if (cfg->opt & MONO_OPT_PEEPHOLE)
2022                 peephole_pass (cfg, bb);
2023
2024         if (cfg->opt & MONO_OPT_LOOP) {
2025                 int pad, align = LOOP_ALIGNMENT;
2026                 /* set alignment depending on cpu */
2027                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2028                         pad = align - pad;
2029                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2030                         x86_padding (code, pad);
2031                         cfg->code_len += pad;
2032                         bb->native_offset = cfg->code_len;
2033                 }
2034         }
2035
2036         if (cfg->verbose_level > 2)
2037                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2038
2039         cpos = bb->max_offset;
2040
2041         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2042                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2043                 g_assert (!cfg->compile_aot);
2044                 cpos += 6;
2045
2046                 cov->data [bb->dfn].cil_code = bb->cil_code;
2047                 /* this is not thread save, but good enough */
2048                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2049         }
2050
2051         offset = code - cfg->native_code;
2052
2053         mono_debug_open_block (cfg, bb, offset);
2054
2055         ins = bb->code;
2056         while (ins) {
2057                 offset = code - cfg->native_code;
2058
2059                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2060
2061                 if (offset > (cfg->code_size - max_len - 16)) {
2062                         cfg->code_size *= 2;
2063                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2064                         code = cfg->native_code + offset;
2065                         mono_jit_stats.code_reallocs++;
2066                 }
2067
2068                 mono_debug_record_line_number (cfg, ins, offset);
2069
2070                 switch (ins->opcode) {
2071                 case OP_BIGMUL:
2072                         x86_mul_reg (code, ins->sreg2, TRUE);
2073                         break;
2074                 case OP_BIGMUL_UN:
2075                         x86_mul_reg (code, ins->sreg2, FALSE);
2076                         break;
2077                 case OP_X86_SETEQ_MEMBASE:
2078                 case OP_X86_SETNE_MEMBASE:
2079                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2080                                          ins->inst_basereg, ins->inst_offset, TRUE);
2081                         break;
2082                 case OP_STOREI1_MEMBASE_IMM:
2083                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2084                         break;
2085                 case OP_STOREI2_MEMBASE_IMM:
2086                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2087                         break;
2088                 case OP_STORE_MEMBASE_IMM:
2089                 case OP_STOREI4_MEMBASE_IMM:
2090                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2091                         break;
2092                 case OP_STOREI1_MEMBASE_REG:
2093                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2094                         break;
2095                 case OP_STOREI2_MEMBASE_REG:
2096                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2097                         break;
2098                 case OP_STORE_MEMBASE_REG:
2099                 case OP_STOREI4_MEMBASE_REG:
2100                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2101                         break;
2102                 case CEE_LDIND_I:
2103                 case CEE_LDIND_I4:
2104                 case CEE_LDIND_U4:
2105                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2106                         break;
2107                 case OP_LOADU4_MEM:
2108                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2109                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2110                         break;
2111                 case OP_LOAD_MEMBASE:
2112                 case OP_LOADI4_MEMBASE:
2113                 case OP_LOADU4_MEMBASE:
2114                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2115                         break;
2116                 case OP_LOADU1_MEMBASE:
2117                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2118                         break;
2119                 case OP_LOADI1_MEMBASE:
2120                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2121                         break;
2122                 case OP_LOADU2_MEMBASE:
2123                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2124                         break;
2125                 case OP_LOADI2_MEMBASE:
2126                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2127                         break;
2128                 case CEE_CONV_I1:
2129                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2130                         break;
2131                 case CEE_CONV_I2:
2132                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2133                         break;
2134                 case CEE_CONV_U1:
2135                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2136                         break;
2137                 case CEE_CONV_U2:
2138                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2139                         break;
2140                 case OP_COMPARE:
2141                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2142                         break;
2143                 case OP_COMPARE_IMM:
2144                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2145                         break;
2146                 case OP_X86_COMPARE_MEMBASE_REG:
2147                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2148                         break;
2149                 case OP_X86_COMPARE_MEMBASE_IMM:
2150                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2151                         break;
2152                 case OP_X86_COMPARE_MEMBASE8_IMM:
2153                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2154                         break;
2155                 case OP_X86_COMPARE_REG_MEMBASE:
2156                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2157                         break;
2158                 case OP_X86_COMPARE_MEM_IMM:
2159                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2160                         break;
2161                 case OP_X86_TEST_NULL:
2162                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2163                         break;
2164                 case OP_X86_ADD_MEMBASE_IMM:
2165                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2166                         break;
2167                 case OP_X86_ADD_MEMBASE:
2168                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2169                         break;
2170                 case OP_X86_SUB_MEMBASE_IMM:
2171                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2172                         break;
2173                 case OP_X86_SUB_MEMBASE:
2174                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2175                         break;
2176                 case OP_X86_AND_MEMBASE_IMM:
2177                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2178                         break;
2179                 case OP_X86_OR_MEMBASE_IMM:
2180                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2181                         break;
2182                 case OP_X86_XOR_MEMBASE_IMM:
2183                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2184                         break;
2185                 case OP_X86_INC_MEMBASE:
2186                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2187                         break;
2188                 case OP_X86_INC_REG:
2189                         x86_inc_reg (code, ins->dreg);
2190                         break;
2191                 case OP_X86_DEC_MEMBASE:
2192                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2193                         break;
2194                 case OP_X86_DEC_REG:
2195                         x86_dec_reg (code, ins->dreg);
2196                         break;
2197                 case OP_X86_MUL_MEMBASE:
2198                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2199                         break;
2200                 case OP_BREAK:
2201                         x86_breakpoint (code);
2202                         break;
2203                 case OP_ADDCC:
2204                 case CEE_ADD:
2205                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2206                         break;
2207                 case OP_ADC:
2208                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2209                         break;
2210                 case OP_ADDCC_IMM:
2211                 case OP_ADD_IMM:
2212                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2213                         break;
2214                 case OP_ADC_IMM:
2215                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2216                         break;
2217                 case OP_SUBCC:
2218                 case CEE_SUB:
2219                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2220                         break;
2221                 case OP_SBB:
2222                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2223                         break;
2224                 case OP_SUBCC_IMM:
2225                 case OP_SUB_IMM:
2226                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2227                         break;
2228                 case OP_SBB_IMM:
2229                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2230                         break;
2231                 case CEE_AND:
2232                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2233                         break;
2234                 case OP_AND_IMM:
2235                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2236                         break;
2237                 case CEE_DIV:
2238                         x86_cdq (code);
2239                         x86_div_reg (code, ins->sreg2, TRUE);
2240                         break;
2241                 case CEE_DIV_UN:
2242                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2243                         x86_div_reg (code, ins->sreg2, FALSE);
2244                         break;
2245                 case OP_DIV_IMM:
2246                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2247                         x86_cdq (code);
2248                         x86_div_reg (code, ins->sreg2, TRUE);
2249                         break;
2250                 case CEE_REM:
2251                         x86_cdq (code);
2252                         x86_div_reg (code, ins->sreg2, TRUE);
2253                         break;
2254                 case CEE_REM_UN:
2255                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2256                         x86_div_reg (code, ins->sreg2, FALSE);
2257                         break;
2258                 case OP_REM_IMM:
2259                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2260                         x86_cdq (code);
2261                         x86_div_reg (code, ins->sreg2, TRUE);
2262                         break;
2263                 case CEE_OR:
2264                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2265                         break;
2266                 case OP_OR_IMM:
2267                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2268                         break;
2269                 case CEE_XOR:
2270                 case OP_IXOR:
2271                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2272                         break;
2273                 case OP_XOR_IMM:
2274                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2275                         break;
2276                 case CEE_SHL:
2277                         g_assert (ins->sreg2 == X86_ECX);
2278                         x86_shift_reg (code, X86_SHL, ins->dreg);
2279                         break;
2280                 case CEE_SHR:
2281                         g_assert (ins->sreg2 == X86_ECX);
2282                         x86_shift_reg (code, X86_SAR, ins->dreg);
2283                         break;
2284                 case OP_SHR_IMM:
2285                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2286                         break;
2287                 case OP_SHR_UN_IMM:
2288                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2289                         break;
2290                 case CEE_SHR_UN:
2291                         g_assert (ins->sreg2 == X86_ECX);
2292                         x86_shift_reg (code, X86_SHR, ins->dreg);
2293                         break;
2294                 case OP_SHL_IMM:
2295                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2296                         break;
2297                 case OP_LSHL: {
2298                         guint8 *jump_to_end;
2299
2300                         /* handle shifts below 32 bits */
2301                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2302                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2303
2304                         x86_test_reg_imm (code, X86_ECX, 32);
2305                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2306
2307                         /* handle shift over 32 bit */
2308                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2309                         x86_clear_reg (code, ins->sreg1);
2310                         
2311                         x86_patch (jump_to_end, code);
2312                         }
2313                         break;
2314                 case OP_LSHR: {
2315                         guint8 *jump_to_end;
2316
2317                         /* handle shifts below 32 bits */
2318                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2319                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2320
2321                         x86_test_reg_imm (code, X86_ECX, 32);
2322                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2323
2324                         /* handle shifts over 31 bits */
2325                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2326                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2327                         
2328                         x86_patch (jump_to_end, code);
2329                         }
2330                         break;
2331                 case OP_LSHR_UN: {
2332                         guint8 *jump_to_end;
2333
2334                         /* handle shifts below 32 bits */
2335                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2336                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2337
2338                         x86_test_reg_imm (code, X86_ECX, 32);
2339                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2340
2341                         /* handle shifts over 31 bits */
2342                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2343                         x86_clear_reg (code, ins->backend.reg3);
2344                         
2345                         x86_patch (jump_to_end, code);
2346                         }
2347                         break;
2348                 case OP_LSHL_IMM:
2349                         if (ins->inst_imm >= 32) {
2350                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2351                                 x86_clear_reg (code, ins->sreg1);
2352                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2353                         } else {
2354                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2355                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2356                         }
2357                         break;
2358                 case OP_LSHR_IMM:
2359                         if (ins->inst_imm >= 32) {
2360                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2361                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2362                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2363                         } else {
2364                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2365                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2366                         }
2367                         break;
2368                 case OP_LSHR_UN_IMM:
2369                         if (ins->inst_imm >= 32) {
2370                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2371                                 x86_clear_reg (code, ins->backend.reg3);
2372                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2373                         } else {
2374                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2375                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2376                         }
2377                         break;
2378                 case CEE_NOT:
2379                         x86_not_reg (code, ins->sreg1);
2380                         break;
2381                 case CEE_NEG:
2382                         x86_neg_reg (code, ins->sreg1);
2383                         break;
2384                 case OP_SEXT_I1:
2385                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2386                         break;
2387                 case OP_SEXT_I2:
2388                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2389                         break;
2390                 case CEE_MUL:
2391                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2392                         break;
2393                 case OP_MUL_IMM:
2394                         switch (ins->inst_imm) {
2395                         case 2:
2396                                 /* MOV r1, r2 */
2397                                 /* ADD r1, r1 */
2398                                 if (ins->dreg != ins->sreg1)
2399                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2400                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2401                                 break;
2402                         case 3:
2403                                 /* LEA r1, [r2 + r2*2] */
2404                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2405                                 break;
2406                         case 5:
2407                                 /* LEA r1, [r2 + r2*4] */
2408                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2409                                 break;
2410                         case 6:
2411                                 /* LEA r1, [r2 + r2*2] */
2412                                 /* ADD r1, r1          */
2413                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2414                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2415                                 break;
2416                         case 9:
2417                                 /* LEA r1, [r2 + r2*8] */
2418                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2419                                 break;
2420                         case 10:
2421                                 /* LEA r1, [r2 + r2*4] */
2422                                 /* ADD r1, r1          */
2423                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2424                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2425                                 break;
2426                         case 12:
2427                                 /* LEA r1, [r2 + r2*2] */
2428                                 /* SHL r1, 2           */
2429                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2430                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2431                                 break;
2432                         case 25:
2433                                 /* LEA r1, [r2 + r2*4] */
2434                                 /* LEA r1, [r1 + r1*4] */
2435                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2436                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2437                                 break;
2438                         case 100:
2439                                 /* LEA r1, [r2 + r2*4] */
2440                                 /* SHL r1, 2           */
2441                                 /* LEA r1, [r1 + r1*4] */
2442                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2443                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2444                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2445                                 break;
2446                         default:
2447                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2448                                 break;
2449                         }
2450                         break;
2451                 case CEE_MUL_OVF:
2452                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2453                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2454                         break;
2455                 case CEE_MUL_OVF_UN: {
2456                         /* the mul operation and the exception check should most likely be split */
2457                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2458                         /*g_assert (ins->sreg2 == X86_EAX);
2459                         g_assert (ins->dreg == X86_EAX);*/
2460                         if (ins->sreg2 == X86_EAX) {
2461                                 non_eax_reg = ins->sreg1;
2462                         } else if (ins->sreg1 == X86_EAX) {
2463                                 non_eax_reg = ins->sreg2;
2464                         } else {
2465                                 /* no need to save since we're going to store to it anyway */
2466                                 if (ins->dreg != X86_EAX) {
2467                                         saved_eax = TRUE;
2468                                         x86_push_reg (code, X86_EAX);
2469                                 }
2470                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2471                                 non_eax_reg = ins->sreg2;
2472                         }
2473                         if (ins->dreg == X86_EDX) {
2474                                 if (!saved_eax) {
2475                                         saved_eax = TRUE;
2476                                         x86_push_reg (code, X86_EAX);
2477                                 }
2478                         } else if (ins->dreg != X86_EAX) {
2479                                 saved_edx = TRUE;
2480                                 x86_push_reg (code, X86_EDX);
2481                         }
2482                         x86_mul_reg (code, non_eax_reg, FALSE);
2483                         /* save before the check since pop and mov don't change the flags */
2484                         if (ins->dreg != X86_EAX)
2485                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2486                         if (saved_edx)
2487                                 x86_pop_reg (code, X86_EDX);
2488                         if (saved_eax)
2489                                 x86_pop_reg (code, X86_EAX);
2490                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2491                         break;
2492                 }
2493                 case OP_ICONST:
2494                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2495                         break;
2496                 case OP_AOTCONST:
2497                         g_assert_not_reached ();
2498                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2499                         x86_mov_reg_imm (code, ins->dreg, 0);
2500                         break;
2501                 case OP_LOAD_GOTADDR:
2502                         x86_call_imm (code, 0);
2503                         /* 
2504                          * The patch needs to point to the pop, since the GOT offset needs 
2505                          * to be added to that address.
2506                          */
2507                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2508                         x86_pop_reg (code, ins->dreg);
2509                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2510                         break;
2511                 case OP_GOT_ENTRY:
2512                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2513                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2514                         break;
2515                 case OP_X86_PUSH_GOT_ENTRY:
2516                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2517                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2518                         break;
2519                 case CEE_CONV_I4:
2520                 case OP_MOVE:
2521                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2522                         break;
2523                 case CEE_CONV_U4:
2524                         g_assert_not_reached ();
2525                 case OP_JMP: {
2526                         /*
2527                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2528                          * Keep in sync with the code in emit_epilog.
2529                          */
2530                         int pos = 0;
2531
2532                         /* FIXME: no tracing support... */
2533                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2534                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2535                         /* reset offset to make max_len work */
2536                         offset = code - cfg->native_code;
2537
2538                         g_assert (!cfg->method->save_lmf);
2539
2540                         code = emit_load_volatile_arguments (cfg, code);
2541
2542                         if (cfg->used_int_regs & (1 << X86_EBX))
2543                                 pos -= 4;
2544                         if (cfg->used_int_regs & (1 << X86_EDI))
2545                                 pos -= 4;
2546                         if (cfg->used_int_regs & (1 << X86_ESI))
2547                                 pos -= 4;
2548                         if (pos)
2549                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2550         
2551                         if (cfg->used_int_regs & (1 << X86_ESI))
2552                                 x86_pop_reg (code, X86_ESI);
2553                         if (cfg->used_int_regs & (1 << X86_EDI))
2554                                 x86_pop_reg (code, X86_EDI);
2555                         if (cfg->used_int_regs & (1 << X86_EBX))
2556                                 x86_pop_reg (code, X86_EBX);
2557         
2558                         /* restore ESP/EBP */
2559                         x86_leave (code);
2560                         offset = code - cfg->native_code;
2561                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2562                         x86_jump32 (code, 0);
2563                         break;
2564                 }
2565                 case OP_CHECK_THIS:
2566                         /* ensure ins->sreg1 is not NULL
2567                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2568                          * cmp DWORD PTR [eax], 0
2569                          */
2570                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2571                         break;
2572                 case OP_ARGLIST: {
2573                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2574                         x86_push_reg (code, hreg);
2575                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2576                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2577                         x86_pop_reg (code, hreg);
2578                         break;
2579                 }
2580                 case OP_FCALL:
2581                 case OP_LCALL:
2582                 case OP_VCALL:
2583                 case OP_VOIDCALL:
2584                 case CEE_CALL:
2585                         call = (MonoCallInst*)ins;
2586                         if (ins->flags & MONO_INST_HAS_METHOD)
2587                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2588                         else
2589                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2590                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2591                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2592                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2593                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2594                                  * smart enough to do that optimization yet
2595                                  *
2596                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2597                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2598                                  * (most likely from locality benefits). People with other processors should
2599                                  * check on theirs to see what happens.
2600                                  */
2601                                 if (call->stack_usage == 4) {
2602                                         /* we want to use registers that won't get used soon, so use
2603                                          * ecx, as eax will get allocated first. edx is used by long calls,
2604                                          * so we can't use that.
2605                                          */
2606                                         
2607                                         x86_pop_reg (code, X86_ECX);
2608                                 } else {
2609                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2610                                 }
2611                         }
2612                         code = emit_move_return_value (cfg, ins, code);
2613                         break;
2614                 case OP_FCALL_REG:
2615                 case OP_LCALL_REG:
2616                 case OP_VCALL_REG:
2617                 case OP_VOIDCALL_REG:
2618                 case OP_CALL_REG:
2619                         call = (MonoCallInst*)ins;
2620                         x86_call_reg (code, ins->sreg1);
2621                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2622                                 if (call->stack_usage == 4)
2623                                         x86_pop_reg (code, X86_ECX);
2624                                 else
2625                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2626                         }
2627                         code = emit_move_return_value (cfg, ins, code);
2628                         break;
2629                 case OP_FCALL_MEMBASE:
2630                 case OP_LCALL_MEMBASE:
2631                 case OP_VCALL_MEMBASE:
2632                 case OP_VOIDCALL_MEMBASE:
2633                 case OP_CALL_MEMBASE:
2634                         call = (MonoCallInst*)ins;
2635                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2636                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2637                                 if (call->stack_usage == 4)
2638                                         x86_pop_reg (code, X86_ECX);
2639                                 else
2640                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2641                         }
2642                         code = emit_move_return_value (cfg, ins, code);
2643                         break;
2644                 case OP_OUTARG:
2645                 case OP_X86_PUSH:
2646                         x86_push_reg (code, ins->sreg1);
2647                         break;
2648                 case OP_X86_PUSH_IMM:
2649                         x86_push_imm (code, ins->inst_imm);
2650                         break;
2651                 case OP_X86_PUSH_MEMBASE:
2652                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2653                         break;
2654                 case OP_X86_PUSH_OBJ: 
2655                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2656                         x86_push_reg (code, X86_EDI);
2657                         x86_push_reg (code, X86_ESI);
2658                         x86_push_reg (code, X86_ECX);
2659                         if (ins->inst_offset)
2660                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2661                         else
2662                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2663                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2664                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2665                         x86_cld (code);
2666                         x86_prefix (code, X86_REP_PREFIX);
2667                         x86_movsd (code);
2668                         x86_pop_reg (code, X86_ECX);
2669                         x86_pop_reg (code, X86_ESI);
2670                         x86_pop_reg (code, X86_EDI);
2671                         break;
2672                 case OP_X86_LEA:
2673                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2674                         break;
2675                 case OP_X86_LEA_MEMBASE:
2676                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2677                         break;
2678                 case OP_X86_XCHG:
2679                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2680                         break;
2681                 case OP_LOCALLOC:
2682                         /* keep alignment */
2683                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2684                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2685                         code = mono_emit_stack_alloc (code, ins);
2686                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2687                         break;
2688                 case CEE_RET:
2689                         x86_ret (code);
2690                         break;
2691                 case OP_THROW: {
2692                         x86_push_reg (code, ins->sreg1);
2693                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2694                                                           (gpointer)"mono_arch_throw_exception");
2695                         break;
2696                 }
2697                 case OP_RETHROW: {
2698                         x86_push_reg (code, ins->sreg1);
2699                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2700                                                           (gpointer)"mono_arch_rethrow_exception");
2701                         break;
2702                 }
2703                 case OP_CALL_HANDLER: 
2704                         /* Align stack */
2705 #ifdef __APPLE__
2706                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2707 #endif
2708                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2709                         x86_call_imm (code, 0);
2710 #ifdef __APPLE__
2711                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2712 #endif
2713                         break;
2714                 case OP_LABEL:
2715                         ins->inst_c0 = code - cfg->native_code;
2716                         break;
2717                 case OP_BR:
2718                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2719                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2720                         //break;
2721                         if (ins->flags & MONO_INST_BRLABEL) {
2722                                 if (ins->inst_i0->inst_c0) {
2723                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2724                                 } else {
2725                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2726                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2727                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2728                                                 x86_jump8 (code, 0);
2729                                         else 
2730                                                 x86_jump32 (code, 0);
2731                                 }
2732                         } else {
2733                                 if (ins->inst_target_bb->native_offset) {
2734                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2735                                 } else {
2736                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2737                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2738                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2739                                                 x86_jump8 (code, 0);
2740                                         else 
2741                                                 x86_jump32 (code, 0);
2742                                 } 
2743                         }
2744                         break;
2745                 case OP_BR_REG:
2746                         x86_jump_reg (code, ins->sreg1);
2747                         break;
2748                 case OP_CEQ:
2749                 case OP_CLT:
2750                 case OP_CLT_UN:
2751                 case OP_CGT:
2752                 case OP_CGT_UN:
2753                 case OP_CNE:
2754                         x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2755                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2756                         break;
2757                 case OP_COND_EXC_EQ:
2758                 case OP_COND_EXC_NE_UN:
2759                 case OP_COND_EXC_LT:
2760                 case OP_COND_EXC_LT_UN:
2761                 case OP_COND_EXC_GT:
2762                 case OP_COND_EXC_GT_UN:
2763                 case OP_COND_EXC_GE:
2764                 case OP_COND_EXC_GE_UN:
2765                 case OP_COND_EXC_LE:
2766                 case OP_COND_EXC_LE_UN:
2767                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2768                         break;
2769                 case OP_COND_EXC_OV:
2770                 case OP_COND_EXC_NO:
2771                 case OP_COND_EXC_C:
2772                 case OP_COND_EXC_NC:
2773                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2774                         break;
2775                 case CEE_BEQ:
2776                 case CEE_BNE_UN:
2777                 case CEE_BLT:
2778                 case CEE_BLT_UN:
2779                 case CEE_BGT:
2780                 case CEE_BGT_UN:
2781                 case CEE_BGE:
2782                 case CEE_BGE_UN:
2783                 case CEE_BLE:
2784                 case CEE_BLE_UN:
2785                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2786                         break;
2787
2788                 /* floating point opcodes */
2789                 case OP_R8CONST: {
2790                         double d = *(double *)ins->inst_p0;
2791
2792                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2793                                 x86_fldz (code);
2794                         } else if (d == 1.0) {
2795                                 x86_fld1 (code);
2796                         } else {
2797                                 if (cfg->compile_aot) {
2798                                         guint32 *val = (guint32*)&d;
2799                                         x86_push_imm (code, val [1]);
2800                                         x86_push_imm (code, val [0]);
2801                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2802                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2803                                 }
2804                                 else {
2805                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2806                                         x86_fld (code, NULL, TRUE);
2807                                 }
2808                         }
2809                         break;
2810                 }
2811                 case OP_R4CONST: {
2812                         float f = *(float *)ins->inst_p0;
2813
2814                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2815                                 x86_fldz (code);
2816                         } else if (f == 1.0) {
2817                                 x86_fld1 (code);
2818                         } else {
2819                                 if (cfg->compile_aot) {
2820                                         guint32 val = *(guint32*)&f;
2821                                         x86_push_imm (code, val);
2822                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2823                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2824                                 }
2825                                 else {
2826                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2827                                         x86_fld (code, NULL, FALSE);
2828                                 }
2829                         }
2830                         break;
2831                 }
2832                 case OP_STORER8_MEMBASE_REG:
2833                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2834                         break;
2835                 case OP_LOADR8_SPILL_MEMBASE:
2836                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2837                         x86_fxch (code, 1);
2838                         break;
2839                 case OP_LOADR8_MEMBASE:
2840                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2841                         break;
2842                 case OP_STORER4_MEMBASE_REG:
2843                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2844                         break;
2845                 case OP_LOADR4_MEMBASE:
2846                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2847                         break;
2848                 case CEE_CONV_R4: /* FIXME: change precision */
2849                 case CEE_CONV_R8:
2850                         x86_push_reg (code, ins->sreg1);
2851                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2852                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2853                         break;
2854                 case OP_X86_FP_LOAD_I8:
2855                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2856                         break;
2857                 case OP_X86_FP_LOAD_I4:
2858                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2859                         break;
2860                 case OP_FCONV_TO_I1:
2861                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2862                         break;
2863                 case OP_FCONV_TO_U1:
2864                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2865                         break;
2866                 case OP_FCONV_TO_I2:
2867                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2868                         break;
2869                 case OP_FCONV_TO_U2:
2870                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2871                         break;
2872                 case OP_FCONV_TO_I4:
2873                 case OP_FCONV_TO_I:
2874                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2875                         break;
2876                 case OP_FCONV_TO_I8:
2877                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2878                         x86_fnstcw_membase(code, X86_ESP, 0);
2879                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2880                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2881                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2882                         x86_fldcw_membase (code, X86_ESP, 2);
2883                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2884                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2885                         x86_pop_reg (code, ins->dreg);
2886                         x86_pop_reg (code, ins->backend.reg3);
2887                         x86_fldcw_membase (code, X86_ESP, 0);
2888                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2889                         break;
2890                 case OP_LCONV_TO_R_UN: { 
2891                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2892                         guint8 *br;
2893
2894                         /* load 64bit integer to FP stack */
2895                         x86_push_imm (code, 0);
2896                         x86_push_reg (code, ins->sreg2);
2897                         x86_push_reg (code, ins->sreg1);
2898                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2899                         /* store as 80bit FP value */
2900                         x86_fst80_membase (code, X86_ESP, 0);
2901                         
2902                         /* test if lreg is negative */
2903                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2904                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2905         
2906                         /* add correction constant mn */
2907                         x86_fld80_mem (code, mn);
2908                         x86_fld80_membase (code, X86_ESP, 0);
2909                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2910                         x86_fst80_membase (code, X86_ESP, 0);
2911
2912                         x86_patch (br, code);
2913
2914                         x86_fld80_membase (code, X86_ESP, 0);
2915                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2916
2917                         break;
2918                 }
2919                 case OP_LCONV_TO_OVF_I: {
2920                         guint8 *br [3], *label [1];
2921                         MonoInst *tins;
2922
2923                         /* 
2924                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2925                          */
2926                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2927
2928                         /* If the low word top bit is set, see if we are negative */
2929                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2930                         /* We are not negative (no top bit set, check for our top word to be zero */
2931                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2932                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2933                         label [0] = code;
2934
2935                         /* throw exception */
2936                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2937                         if (tins) {
2938                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2939                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2940                                         x86_jump8 (code, 0);
2941                                 else
2942                                         x86_jump32 (code, 0);
2943                         } else {
2944                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2945                                 x86_jump32 (code, 0);
2946                         }
2947         
2948         
2949                         x86_patch (br [0], code);
2950                         /* our top bit is set, check that top word is 0xfffffff */
2951                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2952                 
2953                         x86_patch (br [1], code);
2954                         /* nope, emit exception */
2955                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2956                         x86_patch (br [2], label [0]);
2957
2958                         if (ins->dreg != ins->sreg1)
2959                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2960                         break;
2961                 }
2962                 case OP_FADD:
2963                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2964                         break;
2965                 case OP_FSUB:
2966                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2967                         break;          
2968                 case OP_FMUL:
2969                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2970                         break;          
2971                 case OP_FDIV:
2972                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2973                         break;          
2974                 case OP_FNEG:
2975                         x86_fchs (code);
2976                         break;          
2977                 case OP_SIN:
2978                         x86_fsin (code);
2979                         x86_fldz (code);
2980                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2981                         break;          
2982                 case OP_COS:
2983                         x86_fcos (code);
2984                         x86_fldz (code);
2985                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2986                         break;          
2987                 case OP_ABS:
2988                         x86_fabs (code);
2989                         break;          
2990                 case OP_TAN: {
2991                         /* 
2992                          * it really doesn't make sense to inline all this code,
2993                          * it's here just to show that things may not be as simple 
2994                          * as they appear.
2995                          */
2996                         guchar *check_pos, *end_tan, *pop_jump;
2997                         x86_push_reg (code, X86_EAX);
2998                         x86_fptan (code);
2999                         x86_fnstsw (code);
3000                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3001                         check_pos = code;
3002                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3003                         x86_fstp (code, 0); /* pop the 1.0 */
3004                         end_tan = code;
3005                         x86_jump8 (code, 0);
3006                         x86_fldpi (code);
3007                         x86_fp_op (code, X86_FADD, 0);
3008                         x86_fxch (code, 1);
3009                         x86_fprem1 (code);
3010                         x86_fstsw (code);
3011                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3012                         pop_jump = code;
3013                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3014                         x86_fstp (code, 1);
3015                         x86_fptan (code);
3016                         x86_patch (pop_jump, code);
3017                         x86_fstp (code, 0); /* pop the 1.0 */
3018                         x86_patch (check_pos, code);
3019                         x86_patch (end_tan, code);
3020                         x86_fldz (code);
3021                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3022                         x86_pop_reg (code, X86_EAX);
3023                         break;
3024                 }
3025                 case OP_ATAN:
3026                         x86_fld1 (code);
3027                         x86_fpatan (code);
3028                         x86_fldz (code);
3029                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3030                         break;          
3031                 case OP_SQRT:
3032                         x86_fsqrt (code);
3033                         break;          
3034                 case OP_X86_FPOP:
3035                         x86_fstp (code, 0);
3036                         break;          
3037                 case OP_FREM: {
3038                         guint8 *l1, *l2;
3039
3040                         x86_push_reg (code, X86_EAX);
3041                         /* we need to exchange ST(0) with ST(1) */
3042                         x86_fxch (code, 1);
3043
3044                         /* this requires a loop, because fprem somtimes 
3045                          * returns a partial remainder */
3046                         l1 = code;
3047                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3048                         /* x86_fprem1 (code); */
3049                         x86_fprem (code);
3050                         x86_fnstsw (code);
3051                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3052                         l2 = code + 2;
3053                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3054
3055                         /* pop result */
3056                         x86_fstp (code, 1);
3057
3058                         x86_pop_reg (code, X86_EAX);
3059                         break;
3060                 }
3061                 case OP_FCOMPARE:
3062                         if (cfg->opt & MONO_OPT_FCMOV) {
3063                                 x86_fcomip (code, 1);
3064                                 x86_fstp (code, 0);
3065                                 break;
3066                         }
3067                         /* this overwrites EAX */
3068                         EMIT_FPCOMPARE(code);
3069                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3070                         break;
3071                 case OP_FCEQ:
3072                         if (cfg->opt & MONO_OPT_FCMOV) {
3073                                 /* zeroing the register at the start results in 
3074                                  * shorter and faster code (we can also remove the widening op)
3075                                  */
3076                                 guchar *unordered_check;
3077                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3078                                 x86_fcomip (code, 1);
3079                                 x86_fstp (code, 0);
3080                                 unordered_check = code;
3081                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3082                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3083                                 x86_patch (unordered_check, code);
3084                                 break;
3085                         }
3086                         if (ins->dreg != X86_EAX) 
3087                                 x86_push_reg (code, X86_EAX);
3088
3089                         EMIT_FPCOMPARE(code);
3090                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3091                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3092                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3093                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3094
3095                         if (ins->dreg != X86_EAX) 
3096                                 x86_pop_reg (code, X86_EAX);
3097                         break;
3098                 case OP_FCLT:
3099                 case OP_FCLT_UN:
3100                         if (cfg->opt & MONO_OPT_FCMOV) {
3101                                 /* zeroing the register at the start results in 
3102                                  * shorter and faster code (we can also remove the widening op)
3103                                  */
3104                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3105                                 x86_fcomip (code, 1);
3106                                 x86_fstp (code, 0);
3107                                 if (ins->opcode == OP_FCLT_UN) {
3108                                         guchar *unordered_check = code;
3109                                         guchar *jump_to_end;
3110                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3111                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3112                                         jump_to_end = code;
3113                                         x86_jump8 (code, 0);
3114                                         x86_patch (unordered_check, code);
3115                                         x86_inc_reg (code, ins->dreg);
3116                                         x86_patch (jump_to_end, code);
3117                                 } else {
3118                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3119                                 }
3120                                 break;
3121                         }
3122                         if (ins->dreg != X86_EAX) 
3123                                 x86_push_reg (code, X86_EAX);
3124
3125                         EMIT_FPCOMPARE(code);
3126                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3127                         if (ins->opcode == OP_FCLT_UN) {
3128                                 guchar *is_not_zero_check, *end_jump;
3129                                 is_not_zero_check = code;
3130                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3131                                 end_jump = code;
3132                                 x86_jump8 (code, 0);
3133                                 x86_patch (is_not_zero_check, code);
3134                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3135
3136                                 x86_patch (end_jump, code);
3137                         }
3138                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3139                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3140
3141                         if (ins->dreg != X86_EAX) 
3142                                 x86_pop_reg (code, X86_EAX);
3143                         break;
3144                 case OP_FCGT:
3145                 case OP_FCGT_UN:
3146                         if (cfg->opt & MONO_OPT_FCMOV) {
3147                                 /* zeroing the register at the start results in 
3148                                  * shorter and faster code (we can also remove the widening op)
3149                                  */
3150                                 guchar *unordered_check;
3151                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3152                                 x86_fcomip (code, 1);
3153                                 x86_fstp (code, 0);
3154                                 if (ins->opcode == OP_FCGT) {
3155                                         unordered_check = code;
3156                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3157                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3158                                         x86_patch (unordered_check, code);
3159                                 } else {
3160                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3161                                 }
3162                                 break;
3163                         }
3164                         if (ins->dreg != X86_EAX) 
3165                                 x86_push_reg (code, X86_EAX);
3166
3167                         EMIT_FPCOMPARE(code);
3168                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3169                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3170                         if (ins->opcode == OP_FCGT_UN) {
3171                                 guchar *is_not_zero_check, *end_jump;
3172                                 is_not_zero_check = code;
3173                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3174                                 end_jump = code;
3175                                 x86_jump8 (code, 0);
3176                                 x86_patch (is_not_zero_check, code);
3177                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3178         
3179                                 x86_patch (end_jump, code);
3180                         }
3181                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3182                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3183
3184                         if (ins->dreg != X86_EAX) 
3185                                 x86_pop_reg (code, X86_EAX);
3186                         break;
3187                 case OP_FBEQ:
3188                         if (cfg->opt & MONO_OPT_FCMOV) {
3189                                 guchar *jump = code;
3190                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3191                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3192                                 x86_patch (jump, code);
3193                                 break;
3194                         }
3195                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3196                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3197                         break;
3198                 case OP_FBNE_UN:
3199                         /* Branch if C013 != 100 */
3200                         if (cfg->opt & MONO_OPT_FCMOV) {
3201                                 /* branch if !ZF or (PF|CF) */
3202                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3203                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3204                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3205                                 break;
3206                         }
3207                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3208                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3209                         break;
3210                 case OP_FBLT:
3211                         if (cfg->opt & MONO_OPT_FCMOV) {
3212                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3213                                 break;
3214                         }
3215                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3216                         break;
3217                 case OP_FBLT_UN:
3218                         if (cfg->opt & MONO_OPT_FCMOV) {
3219                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3220                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3221                                 break;
3222                         }
3223                         if (ins->opcode == OP_FBLT_UN) {
3224                                 guchar *is_not_zero_check, *end_jump;
3225                                 is_not_zero_check = code;
3226                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3227                                 end_jump = code;
3228                                 x86_jump8 (code, 0);
3229                                 x86_patch (is_not_zero_check, code);
3230                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3231
3232                                 x86_patch (end_jump, code);
3233                         }
3234                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3235                         break;
3236                 case OP_FBGT:
3237                 case OP_FBGT_UN:
3238                         if (cfg->opt & MONO_OPT_FCMOV) {
3239                                 if (ins->opcode == OP_FBGT) {
3240                                         guchar *br1;
3241
3242                                         /* skip branch if C1=1 */
3243                                         br1 = code;
3244                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3245                                         /* branch if (C0 | C3) = 1 */
3246                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3247                                         x86_patch (br1, code);
3248                                 } else {
3249                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3250                                 }
3251                                 break;
3252                         }
3253                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3254                         if (ins->opcode == OP_FBGT_UN) {
3255                                 guchar *is_not_zero_check, *end_jump;
3256                                 is_not_zero_check = code;
3257                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3258                                 end_jump = code;
3259                                 x86_jump8 (code, 0);
3260                                 x86_patch (is_not_zero_check, code);
3261                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3262
3263                                 x86_patch (end_jump, code);
3264                         }
3265                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3266                         break;
3267                 case OP_FBGE:
3268                         /* Branch if C013 == 100 or 001 */
3269                         if (cfg->opt & MONO_OPT_FCMOV) {
3270                                 guchar *br1;
3271
3272                                 /* skip branch if C1=1 */
3273                                 br1 = code;
3274                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3275                                 /* branch if (C0 | C3) = 1 */
3276                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3277                                 x86_patch (br1, code);
3278                                 break;
3279                         }
3280                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3281                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3282                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3283                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3284                         break;
3285                 case OP_FBGE_UN:
3286                         /* Branch if C013 == 000 */
3287                         if (cfg->opt & MONO_OPT_FCMOV) {
3288                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3289                                 break;
3290                         }
3291                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3292                         break;
3293                 case OP_FBLE:
3294                         /* Branch if C013=000 or 100 */
3295                         if (cfg->opt & MONO_OPT_FCMOV) {
3296                                 guchar *br1;
3297
3298                                 /* skip branch if C1=1 */
3299                                 br1 = code;
3300                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3301                                 /* branch if C0=0 */
3302                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3303                                 x86_patch (br1, code);
3304                                 break;
3305                         }
3306                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3307                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3308                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3309                         break;
3310                 case OP_FBLE_UN:
3311                         /* Branch if C013 != 001 */
3312                         if (cfg->opt & MONO_OPT_FCMOV) {
3313                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3314                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3315                                 break;
3316                         }
3317                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3318                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3319                         break;
3320                 case OP_CKFINITE: {
3321                         x86_push_reg (code, X86_EAX);
3322                         x86_fxam (code);
3323                         x86_fnstsw (code);
3324                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3325                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3326                         x86_pop_reg (code, X86_EAX);
3327                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3328                         break;
3329                 }
3330                 case OP_TLS_GET: {
3331                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3332                         break;
3333                 }
3334                 case OP_MEMORY_BARRIER: {
3335                         /* Not needed on x86 */
3336                         break;
3337                 }
3338                 case OP_ATOMIC_ADD_I4: {
3339                         int dreg = ins->dreg;
3340
3341                         if (dreg == ins->inst_basereg) {
3342                                 x86_push_reg (code, ins->sreg2);
3343                                 dreg = ins->sreg2;
3344                         } 
3345                         
3346                         if (dreg != ins->sreg2)
3347                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3348
3349                         x86_prefix (code, X86_LOCK_PREFIX);
3350                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3351
3352                         if (dreg != ins->dreg) {
3353                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3354                                 x86_pop_reg (code, dreg);
3355                         }
3356
3357                         break;
3358                 }
3359                 case OP_ATOMIC_ADD_NEW_I4: {
3360                         int dreg = ins->dreg;
3361
3362                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3363                         if (ins->sreg2 == dreg) {
3364                                 if (dreg == X86_EBX) {
3365                                         dreg = X86_EDI;
3366                                         if (ins->inst_basereg == X86_EDI)
3367                                                 dreg = X86_ESI;
3368                                 } else {
3369                                         dreg = X86_EBX;
3370                                         if (ins->inst_basereg == X86_EBX)
3371                                                 dreg = X86_EDI;
3372                                 }
3373                         } else if (ins->inst_basereg == dreg) {
3374                                 if (dreg == X86_EBX) {
3375                                         dreg = X86_EDI;
3376                                         if (ins->sreg2 == X86_EDI)
3377                                                 dreg = X86_ESI;
3378                                 } else {
3379                                         dreg = X86_EBX;
3380                                         if (ins->sreg2 == X86_EBX)
3381                                                 dreg = X86_EDI;
3382                                 }
3383                         }
3384
3385                         if (dreg != ins->dreg) {
3386                                 x86_push_reg (code, dreg);
3387                         }
3388
3389                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3390                         x86_prefix (code, X86_LOCK_PREFIX);
3391                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3392                         /* dreg contains the old value, add with sreg2 value */
3393                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3394                         
3395                         if (ins->dreg != dreg) {
3396                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3397                                 x86_pop_reg (code, dreg);
3398                         }
3399
3400                         break;
3401                 }
3402                 case OP_ATOMIC_EXCHANGE_I4: {
3403                         guchar *br[2];
3404                         int sreg2 = ins->sreg2;
3405                         int breg = ins->inst_basereg;
3406
3407                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3408                          * hack to overcome limits in x86 reg allocator 
3409                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3410                          */
3411                         if (ins->dreg != X86_EAX)
3412                                 x86_push_reg (code, X86_EAX);
3413                         
3414                         /* We need the EAX reg for the cmpxchg */
3415                         if (ins->sreg2 == X86_EAX) {
3416                                 x86_push_reg (code, X86_EDX);
3417                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3418                                 sreg2 = X86_EDX;
3419                         }
3420
3421                         if (breg == X86_EAX) {
3422                                 x86_push_reg (code, X86_ESI);
3423                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3424                                 breg = X86_ESI;
3425                         }
3426
3427                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3428
3429                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3430                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3431                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3432                         x86_patch (br [1], br [0]);
3433
3434                         if (breg != ins->inst_basereg)
3435                                 x86_pop_reg (code, X86_ESI);
3436
3437                         if (ins->dreg != X86_EAX) {
3438                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3439                                 x86_pop_reg (code, X86_EAX);
3440                         }
3441
3442                         if (ins->sreg2 != sreg2)
3443                                 x86_pop_reg (code, X86_EDX);
3444
3445                         break;
3446                 }
3447                 default:
3448                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3449                         g_assert_not_reached ();
3450                 }
3451
3452                 if ((code - cfg->native_code - offset) > max_len) {
3453                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3454                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3455                         g_assert_not_reached ();
3456                 }
3457                
3458                 cpos += max_len;
3459
3460                 last_ins = ins;
3461                 last_offset = offset;
3462                 
3463                 ins = ins->next;
3464         }
3465
3466         cfg->code_len = code - cfg->native_code;
3467 }
3468
3469 void
3470 mono_arch_register_lowlevel_calls (void)
3471 {
3472 }
3473
3474 void
3475 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3476 {
3477         MonoJumpInfo *patch_info;
3478         gboolean compile_aot = !run_cctors;
3479
3480         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3481                 unsigned char *ip = patch_info->ip.i + code;
3482                 const unsigned char *target;
3483
3484                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3485
3486                 if (compile_aot) {
3487                         switch (patch_info->type) {
3488                         case MONO_PATCH_INFO_BB:
3489                         case MONO_PATCH_INFO_LABEL:
3490                                 break;
3491                         default:
3492                                 /* No need to patch these */
3493                                 continue;
3494                         }
3495                 }
3496
3497                 switch (patch_info->type) {
3498                 case MONO_PATCH_INFO_IP:
3499                         *((gconstpointer *)(ip)) = target;
3500                         break;
3501                 case MONO_PATCH_INFO_CLASS_INIT: {
3502                         guint8 *code = ip;
3503                         /* Might already been changed to a nop */
3504                         x86_call_code (code, 0);
3505                         x86_patch (ip, target);
3506                         break;
3507                 }
3508                 case MONO_PATCH_INFO_ABS:
3509                 case MONO_PATCH_INFO_METHOD:
3510                 case MONO_PATCH_INFO_METHOD_JUMP:
3511                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3512                 case MONO_PATCH_INFO_BB:
3513                 case MONO_PATCH_INFO_LABEL:
3514                         x86_patch (ip, target);
3515                         break;
3516                 case MONO_PATCH_INFO_NONE:
3517                         break;
3518                 default: {
3519                         guint32 offset = mono_arch_get_patch_offset (ip);
3520                         *((gconstpointer *)(ip + offset)) = target;
3521                         break;
3522                 }
3523                 }
3524         }
3525 }
3526
3527 guint8 *
3528 mono_arch_emit_prolog (MonoCompile *cfg)
3529 {
3530         MonoMethod *method = cfg->method;
3531         MonoBasicBlock *bb;
3532         MonoMethodSignature *sig;
3533         MonoInst *inst;
3534         int alloc_size, pos, max_offset, i;
3535         guint8 *code;
3536
3537         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3538
3539         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3540                 cfg->code_size += 512;
3541
3542         code = cfg->native_code = g_malloc (cfg->code_size);
3543
3544         x86_push_reg (code, X86_EBP);
3545         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3546
3547         alloc_size = cfg->stack_offset;
3548         pos = 0;
3549
3550         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3551                 /* Might need to attach the thread to the JIT */
3552                 if (lmf_tls_offset != -1) {
3553                         guint8 *buf;
3554
3555                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3556                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3557                         buf = code;
3558                         x86_branch8 (code, X86_CC_NE, 0, 0);
3559                         x86_push_imm (code, cfg->domain);
3560                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3561                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3562                         x86_patch (buf, code);
3563 #ifdef PLATFORM_WIN32
3564                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3565                         /* FIXME: Add a separate key for LMF to avoid this */
3566                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3567 #endif
3568                 } else {
3569                         g_assert (!cfg->compile_aot);
3570                         x86_push_imm (code, cfg->domain);
3571                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3572                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3573                 }
3574         }
3575
3576         if (method->save_lmf) {
3577                 pos += sizeof (MonoLMF);
3578
3579                 /* save the current IP */
3580                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3581                 x86_push_imm_template (code);
3582
3583                 /* save all caller saved regs */
3584                 x86_push_reg (code, X86_EBP);
3585                 x86_push_reg (code, X86_ESI);
3586                 x86_push_reg (code, X86_EDI);
3587                 x86_push_reg (code, X86_EBX);
3588
3589                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3590                         /*
3591                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3592                          * through the mono_lmf_addr TLS variable.
3593                          */
3594                         /* %eax = previous_lmf */
3595                         x86_prefix (code, X86_GS_PREFIX);
3596                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3597                         /* skip method_info + lmf */
3598                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3599                         /* push previous_lmf */
3600                         x86_push_reg (code, X86_EAX);
3601                         /* new lmf = ESP */
3602                         x86_prefix (code, X86_GS_PREFIX);
3603                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3604                 } else {
3605                         /* get the address of lmf for the current thread */
3606                         /* 
3607                          * This is performance critical so we try to use some tricks to make
3608                          * it fast.
3609                          */                                                                        
3610
3611                         if (lmf_addr_tls_offset != -1) {
3612                                 /* Load lmf quicky using the GS register */
3613                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3614 #ifdef PLATFORM_WIN32
3615                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3616                                 /* FIXME: Add a separate key for LMF to avoid this */
3617                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3618 #endif
3619                         } else {
3620                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3621                         }
3622
3623                         /* Skip method info */
3624                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3625
3626                         /* push lmf */
3627                         x86_push_reg (code, X86_EAX); 
3628                         /* push *lfm (previous_lmf) */
3629                         x86_push_membase (code, X86_EAX, 0);
3630                         /* *(lmf) = ESP */
3631                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3632                 }
3633         } else {
3634
3635                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3636                         x86_push_reg (code, X86_EBX);
3637                         pos += 4;
3638                 }
3639
3640                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3641                         x86_push_reg (code, X86_EDI);
3642                         pos += 4;
3643                 }
3644
3645                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3646                         x86_push_reg (code, X86_ESI);
3647                         pos += 4;
3648                 }
3649         }
3650
3651         alloc_size -= pos;
3652
3653 #if __APPLE__
3654         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3655         {
3656                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3657                 if (tot & 4) {
3658                         tot += 4;
3659                         alloc_size += 4;
3660                 }
3661                 if (tot & 8) {
3662                         alloc_size += 8;
3663                 }
3664         }
3665 #endif
3666
3667         if (alloc_size) {
3668                 /* See mono_emit_stack_alloc */
3669 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3670                 guint32 remaining_size = alloc_size;
3671                 while (remaining_size >= 0x1000) {
3672                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3673                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3674                         remaining_size -= 0x1000;
3675                 }
3676                 if (remaining_size)
3677                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3678 #else
3679                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3680 #endif
3681         }
3682
3683 #if __APPLE_
3684         /* check the stack is aligned */
3685         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3686         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3687         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3688         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3689         x86_breakpoint (code);
3690 #endif
3691
3692         /* compute max_offset in order to use short forward jumps */
3693         max_offset = 0;
3694         if (cfg->opt & MONO_OPT_BRANCH) {
3695                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3696                         MonoInst *ins = bb->code;
3697                         bb->max_offset = max_offset;
3698
3699                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3700                                 max_offset += 6;
3701                         /* max alignment for loops */
3702                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3703                                 max_offset += LOOP_ALIGNMENT;
3704
3705                         while (ins) {
3706                                 if (ins->opcode == OP_LABEL)
3707                                         ins->inst_c1 = max_offset;
3708                                 
3709                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3710                                 ins = ins->next;
3711                         }
3712                 }
3713         }
3714
3715         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3716                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3717
3718         /* load arguments allocated to register from the stack */
3719         sig = mono_method_signature (method);
3720         pos = 0;
3721
3722         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3723                 inst = cfg->args [pos];
3724                 if (inst->opcode == OP_REGVAR) {
3725                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3726                         if (cfg->verbose_level > 2)
3727                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3728                 }
3729                 pos++;
3730         }
3731
3732         cfg->code_len = code - cfg->native_code;
3733
3734         return code;
3735 }
3736
3737 void
3738 mono_arch_emit_epilog (MonoCompile *cfg)
3739 {
3740         MonoMethod *method = cfg->method;
3741         MonoMethodSignature *sig = mono_method_signature (method);
3742         int quad, pos;
3743         guint32 stack_to_pop;
3744         guint8 *code;
3745         int max_epilog_size = 16;
3746         CallInfo *cinfo;
3747         
3748         if (cfg->method->save_lmf)
3749                 max_epilog_size += 128;
3750
3751         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3752                 cfg->code_size *= 2;
3753                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3754                 mono_jit_stats.code_reallocs++;
3755         }
3756
3757         code = cfg->native_code + cfg->code_len;
3758
3759         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3760                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3761
3762         /* the code restoring the registers must be kept in sync with OP_JMP */
3763         pos = 0;
3764         
3765         if (method->save_lmf) {
3766                 gint32 prev_lmf_reg;
3767                 gint32 lmf_offset = -sizeof (MonoLMF);
3768
3769                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3770                         /*
3771                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3772                          * through the mono_lmf_addr TLS variable.
3773                          */
3774                         /* reg = previous_lmf */
3775                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3776
3777                         /* lmf = previous_lmf */
3778                         x86_prefix (code, X86_GS_PREFIX);
3779                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3780                 } else {
3781                         /* Find a spare register */
3782                         switch (sig->ret->type) {
3783                         case MONO_TYPE_I8:
3784                         case MONO_TYPE_U8:
3785                                 prev_lmf_reg = X86_EDI;
3786                                 cfg->used_int_regs |= (1 << X86_EDI);
3787                                 break;
3788                         default:
3789                                 prev_lmf_reg = X86_EDX;
3790                                 break;
3791                         }
3792
3793                         /* reg = previous_lmf */
3794                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3795
3796                         /* ecx = lmf */
3797                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3798
3799                         /* *(lmf) = previous_lmf */
3800                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3801                 }
3802
3803                 /* restore caller saved regs */
3804                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3805                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3806                 }
3807
3808                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3809                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3810                 }
3811                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3812                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3813                 }
3814
3815                 /* EBP is restored by LEAVE */
3816         } else {
3817                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3818                         pos -= 4;
3819                 }
3820                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3821                         pos -= 4;
3822                 }
3823                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3824                         pos -= 4;
3825                 }
3826
3827                 if (pos)
3828                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3829
3830                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3831                         x86_pop_reg (code, X86_ESI);
3832                 }
3833                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3834                         x86_pop_reg (code, X86_EDI);
3835                 }
3836                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3837                         x86_pop_reg (code, X86_EBX);
3838                 }
3839         }
3840
3841         /* Load returned vtypes into registers if needed */
3842         cinfo = get_call_info (cfg->mempool, sig, FALSE);
3843         if (cinfo->ret.storage == ArgValuetypeInReg) {
3844                 for (quad = 0; quad < 2; quad ++) {
3845                         switch (cinfo->ret.pair_storage [quad]) {
3846                         case ArgInIReg:
3847                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3848                                 break;
3849                         case ArgOnFloatFpStack:
3850                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3851                                 break;
3852                         case ArgOnDoubleFpStack:
3853                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3854                                 break;
3855                         case ArgNone:
3856                                 break;
3857                         default:
3858                                 g_assert_not_reached ();
3859                         }
3860                 }
3861         }
3862
3863         x86_leave (code);
3864
3865         if (CALLCONV_IS_STDCALL (sig)) {
3866                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3867
3868                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3869         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3870                 stack_to_pop = 4;
3871         else
3872                 stack_to_pop = 0;
3873
3874         if (stack_to_pop)
3875                 x86_ret_imm (code, stack_to_pop);
3876         else
3877                 x86_ret (code);
3878
3879         cfg->code_len = code - cfg->native_code;
3880
3881         g_assert (cfg->code_len < cfg->code_size);
3882 }
3883
3884 void
3885 mono_arch_emit_exceptions (MonoCompile *cfg)
3886 {
3887         MonoJumpInfo *patch_info;
3888         int nthrows, i;
3889         guint8 *code;
3890         MonoClass *exc_classes [16];
3891         guint8 *exc_throw_start [16], *exc_throw_end [16];
3892         guint32 code_size;
3893         int exc_count = 0;
3894
3895         /* Compute needed space */
3896         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3897                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3898                         exc_count++;
3899         }
3900
3901         /* 
3902          * make sure we have enough space for exceptions
3903          * 16 is the size of two push_imm instructions and a call
3904          */
3905         if (cfg->compile_aot)
3906                 code_size = exc_count * 32;
3907         else
3908                 code_size = exc_count * 16;
3909
3910         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3911                 cfg->code_size *= 2;
3912                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3913                 mono_jit_stats.code_reallocs++;
3914         }
3915
3916         code = cfg->native_code + cfg->code_len;
3917
3918         nthrows = 0;
3919         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3920                 switch (patch_info->type) {
3921                 case MONO_PATCH_INFO_EXC: {
3922                         MonoClass *exc_class;
3923                         guint8 *buf, *buf2;
3924                         guint32 throw_ip;
3925
3926                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3927
3928                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3929                         g_assert (exc_class);
3930                         throw_ip = patch_info->ip.i;
3931
3932                         /* Find a throw sequence for the same exception class */
3933                         for (i = 0; i < nthrows; ++i)
3934                                 if (exc_classes [i] == exc_class)
3935                                         break;
3936                         if (i < nthrows) {
3937                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3938                                 x86_jump_code (code, exc_throw_start [i]);
3939                                 patch_info->type = MONO_PATCH_INFO_NONE;
3940                         }
3941                         else {
3942                                 guint32 size;
3943
3944                                 /* Compute size of code following the push <OFFSET> */
3945                                 size = 5 + 5;
3946
3947                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3948                                         /* Use the shorter form */
3949                                         buf = buf2 = code;
3950                                         x86_push_imm (code, 0);
3951                                 }
3952                                 else {
3953                                         buf = code;
3954                                         x86_push_imm (code, 0xf0f0f0f0);
3955                                         buf2 = code;
3956                                 }
3957
3958                                 if (nthrows < 16) {
3959                                         exc_classes [nthrows] = exc_class;
3960                                         exc_throw_start [nthrows] = code;
3961                                 }
3962
3963                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
3964                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3965                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3966                                 patch_info->ip.i = code - cfg->native_code;
3967                                 x86_call_code (code, 0);
3968                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3969                                 while (buf < buf2)
3970                                         x86_nop (buf);
3971
3972                                 if (nthrows < 16) {
3973                                         exc_throw_end [nthrows] = code;
3974                                         nthrows ++;
3975                                 }
3976                         }
3977                         break;
3978                 }
3979                 default:
3980                         /* do nothing */
3981                         break;
3982                 }
3983         }
3984
3985         cfg->code_len = code - cfg->native_code;
3986
3987         g_assert (cfg->code_len < cfg->code_size);
3988 }
3989
3990 void
3991 mono_arch_flush_icache (guint8 *code, gint size)
3992 {
3993         /* not needed */
3994 }
3995
3996 void
3997 mono_arch_flush_register_windows (void)
3998 {
3999 }
4000
4001 /*
4002  * Support for fast access to the thread-local lmf structure using the GS
4003  * segment register on NPTL + kernel 2.6.x.
4004  */
4005
4006 static gboolean tls_offset_inited = FALSE;
4007
4008 void
4009 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4010 {
4011         if (!tls_offset_inited) {
4012                 if (!getenv ("MONO_NO_TLS")) {
4013 #ifdef PLATFORM_WIN32
4014                         /* 
4015                          * We need to init this multiple times, since when we are first called, the key might not
4016                          * be initialized yet.
4017                          */
4018                         appdomain_tls_offset = mono_domain_get_tls_key ();
4019                         lmf_tls_offset = mono_get_jit_tls_key ();
4020                         thread_tls_offset = mono_thread_get_tls_key ();
4021
4022                         /* Only 64 tls entries can be accessed using inline code */
4023                         if (appdomain_tls_offset >= 64)
4024                                 appdomain_tls_offset = -1;
4025                         if (lmf_tls_offset >= 64)
4026                                 lmf_tls_offset = -1;
4027                         if (thread_tls_offset >= 64)
4028                                 thread_tls_offset = -1;
4029 #else
4030 #if MONO_XEN_OPT
4031                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4032 #endif
4033                         tls_offset_inited = TRUE;
4034                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4035                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4036                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4037                         thread_tls_offset = mono_thread_get_tls_offset ();
4038 #endif
4039                 }
4040         }               
4041 }
4042
4043 void
4044 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4045 {
4046 }
4047
4048 void
4049 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4050 {
4051         MonoCallInst *call = (MonoCallInst*)inst;
4052         CallInfo *cinfo = get_call_info (cfg->mempool, inst->signature, FALSE);
4053
4054         /* add the this argument */
4055         if (this_reg != -1) {
4056                 if (cinfo->args [0].storage == ArgInIReg) {
4057                         MonoInst *this;
4058                         MONO_INST_NEW (cfg, this, OP_MOVE);
4059                         this->type = this_type;
4060                         this->sreg1 = this_reg;
4061                         this->dreg = mono_regstate_next_int (cfg->rs);
4062                         mono_bblock_add_inst (cfg->cbb, this);
4063
4064                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
4065                 }
4066                 else {
4067                         MonoInst *this;
4068                         MONO_INST_NEW (cfg, this, OP_OUTARG);
4069                         this->type = this_type;
4070                         this->sreg1 = this_reg;
4071                         mono_bblock_add_inst (cfg->cbb, this);
4072                 }
4073         }
4074
4075         if (vt_reg != -1) {
4076                 MonoInst *vtarg;
4077
4078                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4079                         /*
4080                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4081                          * the stack. Save the address here, so the call instruction can
4082                          * access it.
4083                          */
4084                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4085                         vtarg->inst_destbasereg = X86_ESP;
4086                         vtarg->inst_offset = inst->stack_usage;
4087                         vtarg->sreg1 = vt_reg;
4088                         mono_bblock_add_inst (cfg->cbb, vtarg);
4089                 }
4090                 else if (cinfo->ret.storage == ArgInIReg) {
4091                         /* The return address is passed in a register */
4092                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4093                         vtarg->sreg1 = vt_reg;
4094                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4095                         mono_bblock_add_inst (cfg->cbb, vtarg);
4096
4097                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
4098                 } else {
4099                         MonoInst *vtarg;
4100                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4101                         vtarg->type = STACK_MP;
4102                         vtarg->sreg1 = vt_reg;
4103                         mono_bblock_add_inst (cfg->cbb, vtarg);
4104                 }
4105         }
4106 }
4107
4108 #ifdef MONO_ARCH_HAVE_IMT
4109
4110 // Linear handler, the bsearch head compare is shorter
4111 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
4112 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
4113 //        x86_patch(ins,target)
4114 //[1 + 5] x86_jump_mem(inst,mem)
4115
4116 #define CMP_SIZE 6
4117 #define BR_SMALL_SIZE 2
4118 #define BR_LARGE_SIZE 5
4119 #define JUMP_IMM_SIZE 6
4120
4121 static int
4122 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
4123 {
4124         int i, distance = 0;
4125         for (i = start; i < target; ++i)
4126                 distance += imt_entries [i]->chunk_size;
4127         return distance;
4128 }
4129
4130 /*
4131  * LOCKING: called with the domain lock held
4132  */
4133 gpointer
4134 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
4135 {
4136         int i;
4137         int size = 0;
4138         guint8 *code, *start;
4139
4140         for (i = 0; i < count; ++i) {
4141                 MonoIMTCheckItem *item = imt_entries [i];
4142                 if (item->is_equals) {
4143                         if (item->check_target_idx) {
4144                                 if (!item->compare_done)
4145                                         item->chunk_size += CMP_SIZE;
4146                                 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
4147                         } else {
4148                                 item->chunk_size += JUMP_IMM_SIZE;
4149                                 /* with assert below:
4150                                  * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
4151                                  */
4152                         }
4153                 } else {
4154                         item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
4155                         imt_entries [item->check_target_idx]->compare_done = TRUE;
4156                 }
4157                 size += item->chunk_size;
4158         }
4159         code = mono_code_manager_reserve (domain->code_mp, size);
4160         start = code;
4161         for (i = 0; i < count; ++i) {
4162                 MonoIMTCheckItem *item = imt_entries [i];
4163                 item->code_target = code;
4164                 if (item->is_equals) {
4165                         if (item->check_target_idx) {
4166                                 if (!item->compare_done)
4167                                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4168                                 item->jmp_code = code;
4169                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4170                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4171                         } else {
4172                                 /* enable the commented code to assert on wrong method */
4173                                 /*x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4174                                 item->jmp_code = code;
4175                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);*/
4176                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4177                                 /*x86_patch (item->jmp_code, code);
4178                                 x86_breakpoint (code);
4179                                 item->jmp_code = NULL;*/
4180                         }
4181                 } else {
4182                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4183                         item->jmp_code = code;
4184                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
4185                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
4186                         else
4187                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
4188                 }
4189         }
4190         /* patch the branches to get to the target items */
4191         for (i = 0; i < count; ++i) {
4192                 MonoIMTCheckItem *item = imt_entries [i];
4193                 if (item->jmp_code) {
4194                         if (item->check_target_idx) {
4195                                 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
4196                         }
4197                 }
4198         }
4199                 
4200         mono_stats.imt_thunks_size += code - start;
4201         g_assert (code - start <= size);
4202         return start;
4203 }
4204
4205 MonoMethod*
4206 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
4207 {
4208         return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
4209 }
4210
4211 MonoObject*
4212 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method)
4213 {
4214         MonoMethodSignature *sig = mono_method_signature (method);
4215         CallInfo *cinfo = get_call_info (NULL, sig, FALSE);
4216         int this_argument_offset;
4217         MonoObject *this_argument;
4218
4219         /* 
4220          * this is the offset of the this arg from esp as saved at the start of 
4221          * mono_arch_create_trampoline_code () in tramp-x86.c.
4222          */
4223         this_argument_offset = 5;
4224         if (MONO_TYPE_ISSTRUCT (sig->ret) && (cinfo->ret.storage == ArgOnStack))
4225                 this_argument_offset++;
4226
4227         this_argument = * (MonoObject**) (((guint8*) regs [X86_ESP]) + this_argument_offset * sizeof (gpointer));
4228
4229         g_free (cinfo);
4230         return this_argument;
4231 }
4232 #endif
4233
4234 MonoInst*
4235 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4236 {
4237         MonoInst *ins = NULL;
4238
4239         if (cmethod->klass == mono_defaults.math_class) {
4240                 if (strcmp (cmethod->name, "Sin") == 0) {
4241                         MONO_INST_NEW (cfg, ins, OP_SIN);
4242                         ins->inst_i0 = args [0];
4243                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4244                         MONO_INST_NEW (cfg, ins, OP_COS);
4245                         ins->inst_i0 = args [0];
4246                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4247                         MONO_INST_NEW (cfg, ins, OP_TAN);
4248                         ins->inst_i0 = args [0];
4249                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4250                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4251                         ins->inst_i0 = args [0];
4252                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4253                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4254                         ins->inst_i0 = args [0];
4255                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4256                         MONO_INST_NEW (cfg, ins, OP_ABS);
4257                         ins->inst_i0 = args [0];
4258                 }
4259 #if 0
4260                 /* OP_FREM is not IEEE compatible */
4261                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4262                         MONO_INST_NEW (cfg, ins, OP_FREM);
4263                         ins->inst_i0 = args [0];
4264                         ins->inst_i1 = args [1];
4265                 }
4266 #endif
4267         } else if (cmethod->klass == mono_defaults.thread_class &&
4268                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
4269                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
4270         } else if(cmethod->klass->image == mono_defaults.corlib &&
4271                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4272                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4273
4274                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4275                         MonoInst *ins_iconst;
4276
4277                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4278                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4279                         ins_iconst->inst_c0 = 1;
4280
4281                         ins->inst_i0 = args [0];
4282                         ins->inst_i1 = ins_iconst;
4283                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4284                         MonoInst *ins_iconst;
4285
4286                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4287                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4288                         ins_iconst->inst_c0 = -1;
4289
4290                         ins->inst_i0 = args [0];
4291                         ins->inst_i1 = ins_iconst;
4292                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4293                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4294
4295                         ins->inst_i0 = args [0];
4296                         ins->inst_i1 = args [1];
4297                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4298                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4299
4300                         ins->inst_i0 = args [0];
4301                         ins->inst_i1 = args [1];
4302                 }
4303         }
4304
4305         return ins;
4306 }
4307
4308
4309 gboolean
4310 mono_arch_print_tree (MonoInst *tree, int arity)
4311 {
4312         return 0;
4313 }
4314
4315 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4316 {
4317         MonoInst* ins;
4318         
4319         if (appdomain_tls_offset == -1)
4320                 return NULL;
4321
4322         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4323         ins->inst_offset = appdomain_tls_offset;
4324         return ins;
4325 }
4326
4327 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4328 {
4329         MonoInst* ins;
4330
4331         if (thread_tls_offset == -1)
4332                 return NULL;
4333
4334         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4335         ins->inst_offset = thread_tls_offset;
4336         return ins;
4337 }
4338
4339 guint32
4340 mono_arch_get_patch_offset (guint8 *code)
4341 {
4342         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4343                 return 2;
4344         else if ((code [0] == 0xba))
4345                 return 1;
4346         else if ((code [0] == 0x68))
4347                 /* push IMM */
4348                 return 1;
4349         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4350                 /* push <OFFSET>(<REG>) */
4351                 return 2;
4352         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4353                 /* call *<OFFSET>(<REG>) */
4354                 return 2;
4355         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4356                 /* fldl <ADDR> */
4357                 return 2;
4358         else if ((code [0] == 0x58) && (code [1] == 0x05))
4359                 /* pop %eax; add <OFFSET>, %eax */
4360                 return 2;
4361         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4362                 /* pop <REG>; add <OFFSET>, <REG> */
4363                 return 3;
4364         else {
4365                 g_assert_not_reached ();
4366                 return -1;
4367         }
4368 }
4369
4370 gpointer*
4371 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4372 {
4373         guint8 reg = 0;
4374         gint32 disp = 0;
4375
4376         /* go to the start of the call instruction
4377          *
4378          * address_byte = (m << 6) | (o << 3) | reg
4379          * call opcode: 0xff address_byte displacement
4380          * 0xff m=1,o=2 imm8
4381          * 0xff m=2,o=2 imm32
4382          */
4383         code -= 6;
4384
4385         /* 
4386          * A given byte sequence can match more than case here, so we have to be
4387          * really careful about the ordering of the cases. Longer sequences
4388          * come first.
4389          */
4390         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4391                 /*
4392                  * This is an interface call
4393                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4394                  * ff 10                   call   *(%eax)
4395                  */
4396                 reg = x86_modrm_rm (code [5]);
4397                 disp = 0;
4398 #ifdef MONO_ARCH_HAVE_IMT
4399         } else if ((code [-1] == 0xba) && (code [4] == 0xff) && (x86_modrm_mod (code [5]) == 1) && (x86_modrm_reg (code [5]) == 2) && ((signed char)code [6] < 0)) {
4400                 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == edx
4401                  * ba 14 f8 28 08          mov    $0x828f814,%edx
4402                  * ff 50 fc                call   *0xfffffffc(%eax)
4403                  */
4404                 reg = code [5] & 0x07;
4405                 disp = (signed char)code [6];
4406 #endif
4407         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4408                 reg = code [4] & 0x07;
4409                 disp = (signed char)code [5];
4410         } else {
4411                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4412                         reg = code [1] & 0x07;
4413                         disp = *((gint32*)(code + 2));
4414                 } else if ((code [1] == 0xe8)) {
4415                         return NULL;
4416                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4417                         /*
4418                          * This is a interface call
4419                          * 8b 40 30   mov    0x30(%eax),%eax
4420                          * ff 10      call   *(%eax)
4421                          */
4422                         disp = 0;
4423                         reg = code [5] & 0x07;
4424                 }
4425                 else
4426                         return NULL;
4427         }
4428
4429         return (gpointer*)(((gint32)(regs [reg])) + disp);
4430 }
4431
4432 gpointer
4433 mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
4434 {
4435         guint32 esp = regs [X86_ESP];
4436         CallInfo *cinfo;
4437         gpointer res;
4438
4439         cinfo = get_call_info (NULL, sig, FALSE);
4440
4441         /*
4442          * The stack looks like:
4443          * <other args>
4444          * <this=delegate>
4445          * <possible vtype return address>
4446          * <return addr>
4447          * <4 pointers pushed by mono_arch_create_trampoline_code ()>
4448          */
4449         res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
4450         g_free (cinfo);
4451         return res;
4452 }
4453
4454 gpointer
4455 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
4456 {
4457         guint8 *code, *start;
4458         MonoDomain *domain = mono_domain_get ();
4459
4460         /* FIXME: Support more cases */
4461         if (MONO_TYPE_ISSTRUCT (sig->ret))
4462                 return NULL;
4463
4464         /*
4465          * The stack contains:
4466          * <delegate>
4467          * <return addr>
4468          */
4469
4470         if (has_target) {
4471                 mono_domain_lock (domain);
4472                 start = code = mono_code_manager_reserve (domain->code_mp, 64);
4473                 mono_domain_unlock (domain);
4474
4475                 /* Replace the this argument with the target */
4476                 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4477                 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
4478                 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
4479                 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4480
4481                 g_assert ((code - start) < 64);
4482         } else {
4483                 if (sig->param_count == 0) {
4484                         mono_domain_lock (domain);
4485                         start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
4486                         mono_domain_unlock (domain);
4487                 
4488                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4489                         x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4490                 } else {
4491                         /* 
4492                          * The code below does not work in the presence of exceptions, since it 
4493                          * creates a new frame.
4494                          */
4495                         start = NULL;
4496 #if 0
4497                         for (i = 0; i < sig->param_count; ++i)
4498                                 if (!mono_is_regsize_var (sig->params [i]))
4499                                         return NULL;
4500
4501                         mono_domain_lock (domain);
4502                         start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
4503                         mono_domain_unlock (domain);
4504
4505                         /* Load this == delegate */
4506                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4507
4508                         /* Push arguments in opposite order, taking changes in ESP into account */
4509                         for (i = 0; i < sig->param_count; ++i)
4510                                 x86_push_membase (code, X86_ESP, 4 + (sig->param_count * 4));
4511
4512                         /* Call the delegate */
4513                         x86_call_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4514                         if (sig->param_count > 0)
4515                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, sig->param_count * 4);
4516                         x86_ret (code);
4517 #endif
4518                 }
4519         }
4520
4521         return start;
4522 }