e4d3ef858b479c0fce07d0b21599a235717993a5
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #ifndef PLATFORM_WIN32
16 #include <unistd.h>
17 #include <sys/mman.h>
18 #endif
19
20 #include <mono/metadata/appdomain.h>
21 #include <mono/metadata/debug-helpers.h>
22 #include <mono/metadata/threads.h>
23 #include <mono/metadata/profiler-private.h>
24 #include <mono/utils/mono-math.h>
25
26 #include "trace.h"
27 #include "mini-x86.h"
28 #include "inssel.h"
29 #include "cpu-pentium.h"
30
31 /* On windows, these hold the key returned by TlsAlloc () */
32 static gint lmf_tls_offset = -1;
33 static gint appdomain_tls_offset = -1;
34 static gint thread_tls_offset = -1;
35
36 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
37
38 #ifdef PLATFORM_WIN32
39 /* Under windows, the default pinvoke calling convention is stdcall */
40 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
41 #else
42 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
43 #endif
44
45 #define SIGNAL_STACK_SIZE (64 * 1024)
46
47 #define NOT_IMPLEMENTED g_assert_not_reached ()
48
49 const char*
50 mono_arch_regname (int reg) {
51         switch (reg) {
52         case X86_EAX: return "%eax";
53         case X86_EBX: return "%ebx";
54         case X86_ECX: return "%ecx";
55         case X86_EDX: return "%edx";
56         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
57         case X86_EDI: return "%edi";
58         case X86_ESI: return "%esi";
59         }
60         return "unknown";
61 }
62
63 const char*
64 mono_arch_fregname (int reg) {
65         return "unknown";
66 }
67
68 typedef enum {
69         ArgInIReg,
70         ArgInFloatSSEReg,
71         ArgInDoubleSSEReg,
72         ArgOnStack,
73         ArgValuetypeInReg,
74         ArgOnFloatFpStack,
75         ArgOnDoubleFpStack,
76         ArgNone
77 } ArgStorage;
78
79 typedef struct {
80         gint16 offset;
81         gint8  reg;
82         ArgStorage storage;
83
84         /* Only if storage == ArgValuetypeInReg */
85         ArgStorage pair_storage [2];
86         gint8 pair_regs [2];
87 } ArgInfo;
88
89 typedef struct {
90         int nargs;
91         guint32 stack_usage;
92         guint32 reg_usage;
93         guint32 freg_usage;
94         gboolean need_stack_align;
95         ArgInfo ret;
96         ArgInfo sig_cookie;
97         ArgInfo args [1];
98 } CallInfo;
99
100 #define PARAM_REGS 0
101
102 #define FLOAT_PARAM_REGS 0
103
104 static X86_Reg_No param_regs [] = { 0 };
105
106 #ifdef PLATFORM_WIN32
107 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
108 #endif
109
110 static void inline
111 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
112 {
113     ainfo->offset = *stack_size;
114
115     if (*gr >= PARAM_REGS) {
116                 ainfo->storage = ArgOnStack;
117                 (*stack_size) += sizeof (gpointer);
118     }
119     else {
120                 ainfo->storage = ArgInIReg;
121                 ainfo->reg = param_regs [*gr];
122                 (*gr) ++;
123     }
124 }
125
126 static void inline
127 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
128 {
129         ainfo->offset = *stack_size;
130
131         g_assert (PARAM_REGS == 0);
132         
133         ainfo->storage = ArgOnStack;
134         (*stack_size) += sizeof (gpointer) * 2;
135 }
136
137 static void inline
138 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
139 {
140     ainfo->offset = *stack_size;
141
142     if (*gr >= FLOAT_PARAM_REGS) {
143                 ainfo->storage = ArgOnStack;
144                 (*stack_size) += sizeof (gpointer);
145     }
146     else {
147                 /* A double register */
148                 if (is_double)
149                         ainfo->storage = ArgInDoubleSSEReg;
150                 else
151                         ainfo->storage = ArgInFloatSSEReg;
152                 ainfo->reg = *gr;
153                 (*gr) += 1;
154     }
155 }
156
157
158 static void
159 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
160                gboolean is_return,
161                guint32 *gr, guint32 *fr, guint32 *stack_size)
162 {
163         guint32 size;
164         MonoClass *klass;
165
166         klass = mono_class_from_mono_type (type);
167         if (sig->pinvoke) 
168                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
169         else 
170                 size = mono_type_stack_size (&klass->byval_arg, NULL);
171
172 #ifdef PLATFORM_WIN32
173         if (sig->pinvoke && is_return) {
174                 MonoMarshalType *info;
175
176                 /*
177                  * the exact rules are not very well documented, the code below seems to work with the 
178                  * code generated by gcc 3.3.3 -mno-cygwin.
179                  */
180                 info = mono_marshal_load_type_info (klass);
181                 g_assert (info);
182
183                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
184
185                 /* Special case structs with only a float member */
186                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
187                         ainfo->storage = ArgValuetypeInReg;
188                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
189                         return;
190                 }
191                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
192                         ainfo->storage = ArgValuetypeInReg;
193                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
194                         return;
195                 }               
196                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
197                         ainfo->storage = ArgValuetypeInReg;
198                         ainfo->pair_storage [0] = ArgInIReg;
199                         ainfo->pair_regs [0] = return_regs [0];
200                         if (info->native_size > 4) {
201                                 ainfo->pair_storage [1] = ArgInIReg;
202                                 ainfo->pair_regs [1] = return_regs [1];
203                         }
204                         return;
205                 }
206         }
207 #endif
208
209         ainfo->offset = *stack_size;
210         ainfo->storage = ArgOnStack;
211         *stack_size += ALIGN_TO (size, sizeof (gpointer));
212 }
213
214 /*
215  * get_call_info:
216  *
217  *  Obtain information about a call according to the calling convention.
218  * For x86 ELF, see the "System V Application Binary Interface Intel386 
219  * Architecture Processor Supplment, Fourth Edition" document for more
220  * information.
221  * For x86 win32, see ???.
222  */
223 static CallInfo*
224 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
225 {
226         guint32 i, gr, fr;
227         MonoType *ret_type;
228         int n = sig->hasthis + sig->param_count;
229         guint32 stack_size = 0;
230         CallInfo *cinfo;
231
232         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
233
234         gr = 0;
235         fr = 0;
236
237         /* return value */
238         {
239                 ret_type = mono_type_get_underlying_type (sig->ret);
240                 switch (ret_type->type) {
241                 case MONO_TYPE_BOOLEAN:
242                 case MONO_TYPE_I1:
243                 case MONO_TYPE_U1:
244                 case MONO_TYPE_I2:
245                 case MONO_TYPE_U2:
246                 case MONO_TYPE_CHAR:
247                 case MONO_TYPE_I4:
248                 case MONO_TYPE_U4:
249                 case MONO_TYPE_I:
250                 case MONO_TYPE_U:
251                 case MONO_TYPE_PTR:
252                 case MONO_TYPE_FNPTR:
253                 case MONO_TYPE_CLASS:
254                 case MONO_TYPE_OBJECT:
255                 case MONO_TYPE_SZARRAY:
256                 case MONO_TYPE_ARRAY:
257                 case MONO_TYPE_STRING:
258                         cinfo->ret.storage = ArgInIReg;
259                         cinfo->ret.reg = X86_EAX;
260                         break;
261                 case MONO_TYPE_U8:
262                 case MONO_TYPE_I8:
263                         cinfo->ret.storage = ArgInIReg;
264                         cinfo->ret.reg = X86_EAX;
265                         break;
266                 case MONO_TYPE_R4:
267                         cinfo->ret.storage = ArgOnFloatFpStack;
268                         break;
269                 case MONO_TYPE_R8:
270                         cinfo->ret.storage = ArgOnDoubleFpStack;
271                         break;
272                 case MONO_TYPE_VALUETYPE: {
273                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
274
275                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
276                         if (cinfo->ret.storage == ArgOnStack)
277                                 /* The caller passes the address where the value is stored */
278                                 add_general (&gr, &stack_size, &cinfo->ret);
279                         break;
280                 }
281                 case MONO_TYPE_TYPEDBYREF:
282                         /* Same as a valuetype with size 24 */
283                         add_general (&gr, &stack_size, &cinfo->ret);
284                         ;
285                         break;
286                 case MONO_TYPE_VOID:
287                         cinfo->ret.storage = ArgNone;
288                         break;
289                 default:
290                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
291                 }
292         }
293
294         /* this */
295         if (sig->hasthis)
296                 add_general (&gr, &stack_size, cinfo->args + 0);
297
298         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
299                 gr = PARAM_REGS;
300                 fr = FLOAT_PARAM_REGS;
301                 
302                 /* Emit the signature cookie just before the implicit arguments */
303                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
304         }
305
306         for (i = 0; i < sig->param_count; ++i) {
307                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
308                 MonoType *ptype;
309
310                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
311                         /* We allways pass the sig cookie on the stack for simplicity */
312                         /* 
313                          * Prevent implicit arguments + the sig cookie from being passed 
314                          * in registers.
315                          */
316                         gr = PARAM_REGS;
317                         fr = FLOAT_PARAM_REGS;
318
319                         /* Emit the signature cookie just before the implicit arguments */
320                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
321                 }
322
323                 if (sig->params [i]->byref) {
324                         add_general (&gr, &stack_size, ainfo);
325                         continue;
326                 }
327                 ptype = mono_type_get_underlying_type (sig->params [i]);
328                 switch (ptype->type) {
329                 case MONO_TYPE_BOOLEAN:
330                 case MONO_TYPE_I1:
331                 case MONO_TYPE_U1:
332                         add_general (&gr, &stack_size, ainfo);
333                         break;
334                 case MONO_TYPE_I2:
335                 case MONO_TYPE_U2:
336                 case MONO_TYPE_CHAR:
337                         add_general (&gr, &stack_size, ainfo);
338                         break;
339                 case MONO_TYPE_I4:
340                 case MONO_TYPE_U4:
341                         add_general (&gr, &stack_size, ainfo);
342                         break;
343                 case MONO_TYPE_I:
344                 case MONO_TYPE_U:
345                 case MONO_TYPE_PTR:
346                 case MONO_TYPE_FNPTR:
347                 case MONO_TYPE_CLASS:
348                 case MONO_TYPE_OBJECT:
349                 case MONO_TYPE_STRING:
350                 case MONO_TYPE_SZARRAY:
351                 case MONO_TYPE_ARRAY:
352                         add_general (&gr, &stack_size, ainfo);
353                         break;
354                 case MONO_TYPE_VALUETYPE:
355                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
356                         break;
357                 case MONO_TYPE_TYPEDBYREF:
358                         stack_size += sizeof (MonoTypedRef);
359                         ainfo->storage = ArgOnStack;
360                         break;
361                 case MONO_TYPE_U8:
362                 case MONO_TYPE_I8:
363                         add_general_pair (&gr, &stack_size, ainfo);
364                         break;
365                 case MONO_TYPE_R4:
366                         add_float (&fr, &stack_size, ainfo, FALSE);
367                         break;
368                 case MONO_TYPE_R8:
369                         add_float (&fr, &stack_size, ainfo, TRUE);
370                         break;
371                 default:
372                         g_error ("unexpected type 0x%x", ptype->type);
373                         g_assert_not_reached ();
374                 }
375         }
376
377         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
378                 gr = PARAM_REGS;
379                 fr = FLOAT_PARAM_REGS;
380                 
381                 /* Emit the signature cookie just before the implicit arguments */
382                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
383         }
384
385         cinfo->stack_usage = stack_size;
386         cinfo->reg_usage = gr;
387         cinfo->freg_usage = fr;
388         return cinfo;
389 }
390
391 /*
392  * mono_arch_get_argument_info:
393  * @csig:  a method signature
394  * @param_count: the number of parameters to consider
395  * @arg_info: an array to store the result infos
396  *
397  * Gathers information on parameters such as size, alignment and
398  * padding. arg_info should be large enought to hold param_count + 1 entries. 
399  *
400  * Returns the size of the activation frame.
401  */
402 int
403 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
404 {
405         int k, frame_size = 0;
406         int size, align, pad;
407         int offset = 8;
408         CallInfo *cinfo;
409
410         cinfo = get_call_info (csig, FALSE);
411
412         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
413                 frame_size += sizeof (gpointer);
414                 offset += 4;
415         }
416
417         arg_info [0].offset = offset;
418
419         if (csig->hasthis) {
420                 frame_size += sizeof (gpointer);
421                 offset += 4;
422         }
423
424         arg_info [0].size = frame_size;
425
426         for (k = 0; k < param_count; k++) {
427                 
428                 if (csig->pinvoke)
429                         size = mono_type_native_stack_size (csig->params [k], &align);
430                 else
431                         size = mono_type_stack_size (csig->params [k], &align);
432
433                 /* ignore alignment for now */
434                 align = 1;
435
436                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
437                 arg_info [k].pad = pad;
438                 frame_size += size;
439                 arg_info [k + 1].pad = 0;
440                 arg_info [k + 1].size = size;
441                 offset += pad;
442                 arg_info [k + 1].offset = offset;
443                 offset += size;
444         }
445
446         align = MONO_ARCH_FRAME_ALIGNMENT;
447         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
448         arg_info [k].pad = pad;
449
450         g_free (cinfo);
451
452         return frame_size;
453 }
454
455 static const guchar cpuid_impl [] = {
456         0x55,                           /* push   %ebp */
457         0x89, 0xe5,                     /* mov    %esp,%ebp */
458         0x53,                           /* push   %ebx */
459         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
460         0x0f, 0xa2,                     /* cpuid   */
461         0x50,                           /* push   %eax */
462         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
463         0x89, 0x18,                     /* mov    %ebx,(%eax) */
464         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
465         0x89, 0x08,                     /* mov    %ecx,(%eax) */
466         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
467         0x89, 0x10,                     /* mov    %edx,(%eax) */
468         0x58,                           /* pop    %eax */
469         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
470         0x89, 0x02,                     /* mov    %eax,(%edx) */
471         0x5b,                           /* pop    %ebx */
472         0xc9,                           /* leave   */
473         0xc3,                           /* ret     */
474 };
475
476 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
477
478 static int 
479 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
480 {
481         int have_cpuid = 0;
482 #ifndef _MSC_VER
483         __asm__  __volatile__ (
484                 "pushfl\n"
485                 "popl %%eax\n"
486                 "movl %%eax, %%edx\n"
487                 "xorl $0x200000, %%eax\n"
488                 "pushl %%eax\n"
489                 "popfl\n"
490                 "pushfl\n"
491                 "popl %%eax\n"
492                 "xorl %%edx, %%eax\n"
493                 "andl $0x200000, %%eax\n"
494                 "movl %%eax, %0"
495                 : "=r" (have_cpuid)
496                 :
497                 : "%eax", "%edx"
498         );
499 #else
500         __asm {
501                 pushfd
502                 pop eax
503                 mov edx, eax
504                 xor eax, 0x200000
505                 push eax
506                 popfd
507                 pushfd
508                 pop eax
509                 xor eax, edx
510                 and eax, 0x200000
511                 mov have_cpuid, eax
512         }
513 #endif
514         if (have_cpuid) {
515                 /* Have to use the code manager to get around WinXP DEP */
516                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
517                 CpuidFunc func;
518                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
519                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
520
521                 func = (CpuidFunc)ptr;
522                 func (id, p_eax, p_ebx, p_ecx, p_edx);
523
524                 mono_code_manager_destroy (codeman);
525
526                 /*
527                  * We use this approach because of issues with gcc and pic code, see:
528                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
529                 __asm__ __volatile__ ("cpuid"
530                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
531                         : "a" (id));
532                 */
533                 return 1;
534         }
535         return 0;
536 }
537
538 /*
539  * Initialize the cpu to execute managed code.
540  */
541 void
542 mono_arch_cpu_init (void)
543 {
544         /* spec compliance requires running with double precision */
545 #ifndef _MSC_VER
546         guint16 fpcw;
547
548         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
549         fpcw &= ~X86_FPCW_PRECC_MASK;
550         fpcw |= X86_FPCW_PREC_DOUBLE;
551         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
552         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
553 #else
554         _control87 (_PC_53, MCW_PC);
555 #endif
556 }
557
558 /*
559  * This function returns the optimizations supported on this cpu.
560  */
561 guint32
562 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
563 {
564         int eax, ebx, ecx, edx;
565         guint32 opts = 0;
566         
567         *exclude_mask = 0;
568         /* Feature Flags function, flags returned in EDX. */
569         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
570                 if (edx & (1 << 15)) {
571                         opts |= MONO_OPT_CMOV;
572                         if (edx & 1)
573                                 opts |= MONO_OPT_FCMOV;
574                         else
575                                 *exclude_mask |= MONO_OPT_FCMOV;
576                 } else
577                         *exclude_mask |= MONO_OPT_CMOV;
578         }
579         return opts;
580 }
581
582 /*
583  * Determine whenever the trap whose info is in SIGINFO is caused by
584  * integer overflow.
585  */
586 gboolean
587 mono_arch_is_int_overflow (void *sigctx, void *info)
588 {
589         MonoContext ctx;
590         guint8* ip;
591
592         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
593
594         ip = (guint8*)ctx.eip;
595
596         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
597                 gint32 reg;
598
599                 /* idiv REG */
600                 switch (x86_modrm_rm (ip [1])) {
601                 case X86_EAX:
602                         reg = ctx.eax;
603                         break;
604                 case X86_ECX:
605                         reg = ctx.ecx;
606                         break;
607                 case X86_EDX:
608                         reg = ctx.edx;
609                         break;
610                 case X86_EBX:
611                         reg = ctx.ebx;
612                         break;
613                 case X86_ESI:
614                         reg = ctx.esi;
615                         break;
616                 case X86_EDI:
617                         reg = ctx.edi;
618                         break;
619                 default:
620                         g_assert_not_reached ();
621                         reg = -1;
622                 }
623
624                 if (reg == -1)
625                         return TRUE;
626         }
627                         
628         return FALSE;
629 }
630
631 static gboolean
632 is_regsize_var (MonoType *t) {
633         if (t->byref)
634                 return TRUE;
635         switch (mono_type_get_underlying_type (t)->type) {
636         case MONO_TYPE_I4:
637         case MONO_TYPE_U4:
638         case MONO_TYPE_I:
639         case MONO_TYPE_U:
640         case MONO_TYPE_PTR:
641         case MONO_TYPE_FNPTR:
642                 return TRUE;
643         case MONO_TYPE_OBJECT:
644         case MONO_TYPE_STRING:
645         case MONO_TYPE_CLASS:
646         case MONO_TYPE_SZARRAY:
647         case MONO_TYPE_ARRAY:
648                 return TRUE;
649         case MONO_TYPE_VALUETYPE:
650                 return FALSE;
651         }
652         return FALSE;
653 }
654
655 GList *
656 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
657 {
658         GList *vars = NULL;
659         int i;
660
661         for (i = 0; i < cfg->num_varinfo; i++) {
662                 MonoInst *ins = cfg->varinfo [i];
663                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
664
665                 /* unused vars */
666                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
667                         continue;
668
669                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
670                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
671                         continue;
672
673                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
674                  * 8bit quantities in caller saved registers on x86 */
675                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
676                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
677                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
678                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
679                         g_assert (i == vmv->idx);
680                         vars = g_list_prepend (vars, vmv);
681                 }
682         }
683
684         vars = mono_varlist_sort (cfg, vars, 0);
685
686         return vars;
687 }
688
689 GList *
690 mono_arch_get_global_int_regs (MonoCompile *cfg)
691 {
692         GList *regs = NULL;
693
694         /* we can use 3 registers for global allocation */
695         regs = g_list_prepend (regs, (gpointer)X86_EBX);
696         regs = g_list_prepend (regs, (gpointer)X86_ESI);
697         regs = g_list_prepend (regs, (gpointer)X86_EDI);
698
699         return regs;
700 }
701
702 /*
703  * mono_arch_regalloc_cost:
704  *
705  *  Return the cost, in number of memory references, of the action of 
706  * allocating the variable VMV into a register during global register
707  * allocation.
708  */
709 guint32
710 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
711 {
712         MonoInst *ins = cfg->varinfo [vmv->idx];
713
714         if (cfg->method->save_lmf)
715                 /* The register is already saved */
716                 return (ins->opcode == OP_ARG) ? 1 : 0;
717         else
718                 /* push+pop+possible load if it is an argument */
719                 return (ins->opcode == OP_ARG) ? 3 : 2;
720 }
721  
722 /*
723  * Set var information according to the calling convention. X86 version.
724  * The locals var stuff should most likely be split in another method.
725  */
726 void
727 mono_arch_allocate_vars (MonoCompile *cfg)
728 {
729         MonoMethodSignature *sig;
730         MonoMethodHeader *header;
731         MonoInst *inst;
732         guint32 locals_stack_size, locals_stack_align;
733         int i, offset, curinst, size, align;
734         gint32 *offsets;
735         CallInfo *cinfo;
736
737         header = mono_method_get_header (cfg->method);
738         sig = mono_method_signature (cfg->method);
739
740         offset = 8;
741         curinst = 0;
742
743         cinfo = get_call_info (sig, FALSE);
744
745         switch (cinfo->ret.storage) {
746         case ArgOnStack:
747                 cfg->ret->opcode = OP_REGOFFSET;
748                 cfg->ret->inst_basereg = X86_EBP;
749                 cfg->ret->inst_offset = offset;
750                 offset += sizeof (gpointer);
751                 break;
752         case ArgValuetypeInReg:
753                 break;
754         case ArgInIReg:
755                 cfg->ret->opcode = OP_REGVAR;
756                 cfg->ret->inst_c0 = cinfo->ret.reg;
757                 break;
758         case ArgNone:
759         case ArgOnFloatFpStack:
760         case ArgOnDoubleFpStack:
761                 break;
762         default:
763                 g_assert_not_reached ();
764         }
765
766         if (sig->hasthis) {
767                 inst = cfg->varinfo [curinst];
768                 if (inst->opcode != OP_REGVAR) {
769                         inst->opcode = OP_REGOFFSET;
770                         inst->inst_basereg = X86_EBP;
771                 }
772                 inst->inst_offset = offset;
773                 offset += sizeof (gpointer);
774                 curinst++;
775         }
776
777         if (sig->call_convention == MONO_CALL_VARARG) {
778                 cfg->sig_cookie = offset;
779                 offset += sizeof (gpointer);
780         }
781
782         for (i = 0; i < sig->param_count; ++i) {
783                 inst = cfg->varinfo [curinst];
784                 if (inst->opcode != OP_REGVAR) {
785                         inst->opcode = OP_REGOFFSET;
786                         inst->inst_basereg = X86_EBP;
787                 }
788                 inst->inst_offset = offset;
789                 size = mono_type_size (sig->params [i], &align);
790                 size += 4 - 1;
791                 size &= ~(4 - 1);
792                 offset += size;
793                 curinst++;
794         }
795
796         offset = 0;
797
798         /* reserve space to save LMF and caller saved registers */
799
800         if (cfg->method->save_lmf) {
801                 offset += sizeof (MonoLMF);
802         } else {
803                 if (cfg->used_int_regs & (1 << X86_EBX)) {
804                         offset += 4;
805                 }
806
807                 if (cfg->used_int_regs & (1 << X86_EDI)) {
808                         offset += 4;
809                 }
810
811                 if (cfg->used_int_regs & (1 << X86_ESI)) {
812                         offset += 4;
813                 }
814         }
815
816         switch (cinfo->ret.storage) {
817         case ArgValuetypeInReg:
818                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
819                 offset += 8;
820                 cfg->ret->opcode = OP_REGOFFSET;
821                 cfg->ret->inst_basereg = X86_EBP;
822                 cfg->ret->inst_offset = - offset;
823                 break;
824         default:
825                 break;
826         }
827
828         /* Allocate locals */
829         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
830         if (locals_stack_align) {
831                 offset += (locals_stack_align - 1);
832                 offset &= ~(locals_stack_align - 1);
833         }
834         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
835                 if (offsets [i] != -1) {
836                         MonoInst *inst = cfg->varinfo [i];
837                         inst->opcode = OP_REGOFFSET;
838                         inst->inst_basereg = X86_EBP;
839                         inst->inst_offset = - (offset + offsets [i]);
840                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
841                 }
842         }
843         g_free (offsets);
844         offset += locals_stack_size;
845
846         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
847         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
848
849         g_free (cinfo);
850
851         cfg->frame_reg = MONO_ARCH_BASEREG;
852
853         cfg->stack_offset = offset;
854 }
855
856 void
857 mono_arch_create_vars (MonoCompile *cfg)
858 {
859         MonoMethodSignature *sig;
860         CallInfo *cinfo;
861
862         sig = mono_method_signature (cfg->method);
863
864         cinfo = get_call_info (sig, FALSE);
865
866         if (cinfo->ret.storage == ArgValuetypeInReg)
867                 cfg->ret_var_is_local = TRUE;
868
869         g_free (cinfo);
870 }
871
872 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
873  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
874  */
875
876 /* 
877  * take the arguments and generate the arch-specific
878  * instructions to properly call the function in call.
879  * This includes pushing, moving arguments to the right register
880  * etc.
881  * Issue: who does the spilling if needed, and when?
882  */
883 MonoCallInst*
884 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
885         MonoInst *arg, *in;
886         MonoMethodSignature *sig;
887         int i, n, stack_size, type;
888         MonoType *ptype;
889         CallInfo *cinfo;
890
891         stack_size = 0;
892         /* add the vararg cookie before the non-implicit args */
893         if (call->signature->call_convention == MONO_CALL_VARARG) {
894                 MonoInst *sig_arg;
895                 /* FIXME: Add support for signature tokens to AOT */
896                 cfg->disable_aot = TRUE;
897                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
898                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
899                 sig_arg->inst_p0 = call->signature;
900                 arg->inst_left = sig_arg;
901                 arg->type = STACK_PTR;
902                 /* prepend, so they get reversed */
903                 arg->next = call->out_args;
904                 call->out_args = arg;
905                 stack_size += sizeof (gpointer);
906         }
907         sig = call->signature;
908         n = sig->param_count + sig->hasthis;
909
910         cinfo = get_call_info (sig, FALSE);
911
912         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
913                 if (cinfo->ret.storage == ArgOnStack)
914                         stack_size += sizeof (gpointer);
915         }
916
917         for (i = 0; i < n; ++i) {
918                 if (is_virtual && i == 0) {
919                         /* the argument will be attached to the call instrucion */
920                         in = call->args [i];
921                         stack_size += 4;
922                 } else {
923                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
924                         in = call->args [i];
925                         arg->cil_code = in->cil_code;
926                         arg->inst_left = in;
927                         arg->type = in->type;
928                         /* prepend, so they get reversed */
929                         arg->next = call->out_args;
930                         call->out_args = arg;
931                         if (i >= sig->hasthis) {
932                                 MonoType *t = sig->params [i - sig->hasthis];
933                                 ptype = mono_type_get_underlying_type (t);
934                                 if (t->byref)
935                                         type = MONO_TYPE_U;
936                                 else
937                                         type = ptype->type;
938                                 /* FIXME: validate arguments... */
939                                 switch (type) {
940                                 case MONO_TYPE_I:
941                                 case MONO_TYPE_U:
942                                 case MONO_TYPE_BOOLEAN:
943                                 case MONO_TYPE_CHAR:
944                                 case MONO_TYPE_I1:
945                                 case MONO_TYPE_U1:
946                                 case MONO_TYPE_I2:
947                                 case MONO_TYPE_U2:
948                                 case MONO_TYPE_I4:
949                                 case MONO_TYPE_U4:
950                                 case MONO_TYPE_STRING:
951                                 case MONO_TYPE_CLASS:
952                                 case MONO_TYPE_OBJECT:
953                                 case MONO_TYPE_PTR:
954                                 case MONO_TYPE_FNPTR:
955                                 case MONO_TYPE_ARRAY:
956                                 case MONO_TYPE_SZARRAY:
957                                         stack_size += 4;
958                                         break;
959                                 case MONO_TYPE_I8:
960                                 case MONO_TYPE_U8:
961                                         stack_size += 8;
962                                         break;
963                                 case MONO_TYPE_R4:
964                                         stack_size += 4;
965                                         arg->opcode = OP_OUTARG_R4;
966                                         break;
967                                 case MONO_TYPE_R8:
968                                         stack_size += 8;
969                                         arg->opcode = OP_OUTARG_R8;
970                                         break;
971                                 case MONO_TYPE_VALUETYPE: {
972                                         int size;
973                                         if (sig->pinvoke) 
974                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
975                                         else 
976                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
977
978                                         stack_size += size;
979                                         arg->opcode = OP_OUTARG_VT;
980                                         arg->klass = in->klass;
981                                         arg->unused = sig->pinvoke;
982                                         arg->inst_imm = size; 
983                                         break;
984                                 }
985                                 case MONO_TYPE_TYPEDBYREF:
986                                         stack_size += sizeof (MonoTypedRef);
987                                         arg->opcode = OP_OUTARG_VT;
988                                         arg->klass = in->klass;
989                                         arg->unused = sig->pinvoke;
990                                         arg->inst_imm = sizeof (MonoTypedRef); 
991                                         break;
992                                 default:
993                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
994                                 }
995                         } else {
996                                 /* the this argument */
997                                 stack_size += 4;
998                         }
999                 }
1000         }
1001
1002         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1003                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1004                         MonoInst *zero_inst;
1005                         /*
1006                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1007                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1008                          * before calling the function. So we add a dummy instruction to represent pushing the 
1009                          * struct return address to the stack. The return address will be saved to this stack slot 
1010                          * by the code emitted in this_vret_args.
1011                          */
1012                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1013                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1014                         zero_inst->inst_p0 = 0;
1015                         arg->inst_left = zero_inst;
1016                         arg->type = STACK_PTR;
1017                         /* prepend, so they get reversed */
1018                         arg->next = call->out_args;
1019                         call->out_args = arg;
1020                 }
1021                 else
1022                         /* if the function returns a struct, the called method already does a ret $0x4 */
1023                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1024                                 stack_size -= 4;
1025         }
1026
1027         call->stack_usage = stack_size;
1028         g_free (cinfo);
1029
1030         /* 
1031          * should set more info in call, such as the stack space
1032          * used by the args that needs to be added back to esp
1033          */
1034
1035         return call;
1036 }
1037
1038 /*
1039  * Allow tracing to work with this interface (with an optional argument)
1040  */
1041 void*
1042 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1043 {
1044         guchar *code = p;
1045
1046         /* if some args are passed in registers, we need to save them here */
1047         x86_push_reg (code, X86_EBP);
1048
1049         if (cfg->compile_aot) {
1050                 x86_push_imm (code, cfg->method);
1051                 x86_mov_reg_imm (code, X86_EAX, func);
1052                 x86_call_reg (code, X86_EAX);
1053         } else {
1054                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1055                 x86_push_imm (code, cfg->method);
1056                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1057                 x86_call_code (code, 0);
1058         }
1059         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1060
1061         return code;
1062 }
1063
1064 enum {
1065         SAVE_NONE,
1066         SAVE_STRUCT,
1067         SAVE_EAX,
1068         SAVE_EAX_EDX,
1069         SAVE_FP
1070 };
1071
1072 void*
1073 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1074 {
1075         guchar *code = p;
1076         int arg_size = 0, save_mode = SAVE_NONE;
1077         MonoMethod *method = cfg->method;
1078         
1079         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1080         case MONO_TYPE_VOID:
1081                 /* special case string .ctor icall */
1082                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1083                         save_mode = SAVE_EAX;
1084                 else
1085                         save_mode = SAVE_NONE;
1086                 break;
1087         case MONO_TYPE_I8:
1088         case MONO_TYPE_U8:
1089                 save_mode = SAVE_EAX_EDX;
1090                 break;
1091         case MONO_TYPE_R4:
1092         case MONO_TYPE_R8:
1093                 save_mode = SAVE_FP;
1094                 break;
1095         case MONO_TYPE_VALUETYPE:
1096                 save_mode = SAVE_STRUCT;
1097                 break;
1098         default:
1099                 save_mode = SAVE_EAX;
1100                 break;
1101         }
1102
1103         switch (save_mode) {
1104         case SAVE_EAX_EDX:
1105                 x86_push_reg (code, X86_EDX);
1106                 x86_push_reg (code, X86_EAX);
1107                 if (enable_arguments) {
1108                         x86_push_reg (code, X86_EDX);
1109                         x86_push_reg (code, X86_EAX);
1110                         arg_size = 8;
1111                 }
1112                 break;
1113         case SAVE_EAX:
1114                 x86_push_reg (code, X86_EAX);
1115                 if (enable_arguments) {
1116                         x86_push_reg (code, X86_EAX);
1117                         arg_size = 4;
1118                 }
1119                 break;
1120         case SAVE_FP:
1121                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1122                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1123                 if (enable_arguments) {
1124                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1125                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1126                         arg_size = 8;
1127                 }
1128                 break;
1129         case SAVE_STRUCT:
1130                 if (enable_arguments) {
1131                         x86_push_membase (code, X86_EBP, 8);
1132                         arg_size = 4;
1133                 }
1134                 break;
1135         case SAVE_NONE:
1136         default:
1137                 break;
1138         }
1139
1140         if (cfg->compile_aot) {
1141                 x86_push_imm (code, method);
1142                 x86_mov_reg_imm (code, X86_EAX, func);
1143                 x86_call_reg (code, X86_EAX);
1144         } else {
1145                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1146                 x86_push_imm (code, method);
1147                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1148                 x86_call_code (code, 0);
1149         }
1150         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1151
1152         switch (save_mode) {
1153         case SAVE_EAX_EDX:
1154                 x86_pop_reg (code, X86_EAX);
1155                 x86_pop_reg (code, X86_EDX);
1156                 break;
1157         case SAVE_EAX:
1158                 x86_pop_reg (code, X86_EAX);
1159                 break;
1160         case SAVE_FP:
1161                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1162                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1163                 break;
1164         case SAVE_NONE:
1165         default:
1166                 break;
1167         }
1168
1169         return code;
1170 }
1171
1172 #define EMIT_COND_BRANCH(ins,cond,sign) \
1173 if (ins->flags & MONO_INST_BRLABEL) { \
1174         if (ins->inst_i0->inst_c0) { \
1175                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1176         } else { \
1177                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1178                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1179                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1180                         x86_branch8 (code, cond, 0, sign); \
1181                 else \
1182                         x86_branch32 (code, cond, 0, sign); \
1183         } \
1184 } else { \
1185         if (ins->inst_true_bb->native_offset) { \
1186                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1187         } else { \
1188                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1189                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1190                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1191                         x86_branch8 (code, cond, 0, sign); \
1192                 else \
1193                         x86_branch32 (code, cond, 0, sign); \
1194         } \
1195 }
1196
1197 /* emit an exception if condition is fail */
1198 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1199         do {                                                        \
1200                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1201                                     MONO_PATCH_INFO_EXC, exc_name);  \
1202                 x86_branch32 (code, cond, 0, signed);               \
1203         } while (0); 
1204
1205 #define EMIT_FPCOMPARE(code) do { \
1206         x86_fcompp (code); \
1207         x86_fnstsw (code); \
1208 } while (0); 
1209
1210
1211 static guint8*
1212 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1213 {
1214         if (cfg->compile_aot) {
1215                 guint32 got_reg = X86_EAX;
1216
1217                 if (cfg->compile_aot) {          
1218                         /*
1219                          * Since the patches are generated by the back end, there is
1220                          * no way to generate a got_var at this point.
1221                          */
1222                         g_assert (cfg->got_var);
1223
1224                         if (cfg->got_var->opcode == OP_REGOFFSET)
1225                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1226                         else
1227                                 got_reg = cfg->got_var->dreg;
1228                 }
1229
1230                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1231                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1232         }
1233         else {
1234                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1235                 x86_call_code (code, 0);
1236         }
1237
1238         return code;
1239 }
1240
1241 /* FIXME: Add more instructions */
1242 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1243
1244 static void
1245 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1246 {
1247         MonoInst *ins, *last_ins = NULL;
1248         ins = bb->code;
1249
1250         while (ins) {
1251
1252                 switch (ins->opcode) {
1253                 case OP_ICONST:
1254                         /* reg = 0 -> XOR (reg, reg) */
1255                         /* XOR sets cflags on x86, so we cant do it always */
1256                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1257                                 ins->opcode = CEE_XOR;
1258                                 ins->sreg1 = ins->dreg;
1259                                 ins->sreg2 = ins->dreg;
1260                         }
1261                         break;
1262                 case OP_MUL_IMM: 
1263                         /* remove unnecessary multiplication with 1 */
1264                         if (ins->inst_imm == 1) {
1265                                 if (ins->dreg != ins->sreg1) {
1266                                         ins->opcode = OP_MOVE;
1267                                 } else {
1268                                         last_ins->next = ins->next;
1269                                         ins = ins->next;
1270                                         continue;
1271                                 }
1272                         }
1273                         break;
1274                 case OP_COMPARE_IMM:
1275                         /* OP_COMPARE_IMM (reg, 0) 
1276                          * --> 
1277                          * OP_X86_TEST_NULL (reg) 
1278                          */
1279                         if (!ins->inst_imm)
1280                                 ins->opcode = OP_X86_TEST_NULL;
1281                         break;
1282                 case OP_X86_COMPARE_MEMBASE_IMM:
1283                         /* 
1284                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1285                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1286                          * -->
1287                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1288                          * OP_COMPARE_IMM reg, imm
1289                          *
1290                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1291                          */
1292                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1293                             ins->inst_basereg == last_ins->inst_destbasereg &&
1294                             ins->inst_offset == last_ins->inst_offset) {
1295                                         ins->opcode = OP_COMPARE_IMM;
1296                                         ins->sreg1 = last_ins->sreg1;
1297
1298                                         /* check if we can remove cmp reg,0 with test null */
1299                                         if (!ins->inst_imm)
1300                                                 ins->opcode = OP_X86_TEST_NULL;
1301                                 }
1302
1303                         break;
1304                 case OP_LOAD_MEMBASE:
1305                 case OP_LOADI4_MEMBASE:
1306                         /* 
1307                          * Note: if reg1 = reg2 the load op is removed
1308                          *
1309                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1310                          * OP_LOAD_MEMBASE offset(basereg), reg2
1311                          * -->
1312                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1313                          * OP_MOVE reg1, reg2
1314                          */
1315                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1316                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1317                             ins->inst_basereg == last_ins->inst_destbasereg &&
1318                             ins->inst_offset == last_ins->inst_offset) {
1319                                 if (ins->dreg == last_ins->sreg1) {
1320                                         last_ins->next = ins->next;                             
1321                                         ins = ins->next;                                
1322                                         continue;
1323                                 } else {
1324                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1325                                         ins->opcode = OP_MOVE;
1326                                         ins->sreg1 = last_ins->sreg1;
1327                                 }
1328
1329                         /* 
1330                          * Note: reg1 must be different from the basereg in the second load
1331                          * Note: if reg1 = reg2 is equal then second load is removed
1332                          *
1333                          * OP_LOAD_MEMBASE offset(basereg), reg1
1334                          * OP_LOAD_MEMBASE offset(basereg), reg2
1335                          * -->
1336                          * OP_LOAD_MEMBASE offset(basereg), reg1
1337                          * OP_MOVE reg1, reg2
1338                          */
1339                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1340                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1341                               ins->inst_basereg != last_ins->dreg &&
1342                               ins->inst_basereg == last_ins->inst_basereg &&
1343                               ins->inst_offset == last_ins->inst_offset) {
1344
1345                                 if (ins->dreg == last_ins->dreg) {
1346                                         last_ins->next = ins->next;                             
1347                                         ins = ins->next;                                
1348                                         continue;
1349                                 } else {
1350                                         ins->opcode = OP_MOVE;
1351                                         ins->sreg1 = last_ins->dreg;
1352                                 }
1353
1354                                 //g_assert_not_reached ();
1355
1356 #if 0
1357                         /* 
1358                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1359                          * OP_LOAD_MEMBASE offset(basereg), reg
1360                          * -->
1361                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1362                          * OP_ICONST reg, imm
1363                          */
1364                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1365                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1366                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1367                                    ins->inst_offset == last_ins->inst_offset) {
1368                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1369                                 ins->opcode = OP_ICONST;
1370                                 ins->inst_c0 = last_ins->inst_imm;
1371                                 g_assert_not_reached (); // check this rule
1372 #endif
1373                         }
1374                         break;
1375                 case OP_LOADU1_MEMBASE:
1376                 case OP_LOADI1_MEMBASE:
1377                         /* 
1378                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1379                          * OP_LOAD_MEMBASE offset(basereg), reg2
1380                          * -->
1381                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1382                          * CONV_I2/U2 reg1, reg2
1383                          */
1384                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1385                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1386                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1387                                         ins->inst_offset == last_ins->inst_offset) {
1388                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1389                                 ins->sreg1 = last_ins->sreg1;
1390                         }
1391                         break;
1392                 case OP_LOADU2_MEMBASE:
1393                 case OP_LOADI2_MEMBASE:
1394                         /* 
1395                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1396                          * OP_LOAD_MEMBASE offset(basereg), reg2
1397                          * -->
1398                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1399                          * CONV_I2/U2 reg1, reg2
1400                          */
1401                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1402                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1403                                         ins->inst_offset == last_ins->inst_offset) {
1404                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1405                                 ins->sreg1 = last_ins->sreg1;
1406                         }
1407                         break;
1408                 case CEE_CONV_I4:
1409                 case CEE_CONV_U4:
1410                 case OP_MOVE:
1411                         /*
1412                          * Removes:
1413                          *
1414                          * OP_MOVE reg, reg 
1415                          */
1416                         if (ins->dreg == ins->sreg1) {
1417                                 if (last_ins)
1418                                         last_ins->next = ins->next;                             
1419                                 ins = ins->next;
1420                                 continue;
1421                         }
1422                         /* 
1423                          * Removes:
1424                          *
1425                          * OP_MOVE sreg, dreg 
1426                          * OP_MOVE dreg, sreg
1427                          */
1428                         if (last_ins && last_ins->opcode == OP_MOVE &&
1429                             ins->sreg1 == last_ins->dreg &&
1430                             ins->dreg == last_ins->sreg1) {
1431                                 last_ins->next = ins->next;                             
1432                                 ins = ins->next;                                
1433                                 continue;
1434                         }
1435                         break;
1436                         
1437                 case OP_X86_PUSH_MEMBASE:
1438                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1439                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1440                             ins->inst_basereg == last_ins->inst_destbasereg &&
1441                             ins->inst_offset == last_ins->inst_offset) {
1442                                     ins->opcode = OP_X86_PUSH;
1443                                     ins->sreg1 = last_ins->sreg1;
1444                         }
1445                         break;
1446                 }
1447                 last_ins = ins;
1448                 ins = ins->next;
1449         }
1450         bb->last_ins = last_ins;
1451 }
1452
1453 static const int 
1454 branch_cc_table [] = {
1455         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1456         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1457         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1458 };
1459
1460 static const char*const * ins_spec = pentium_desc;
1461
1462 /*#include "cprop.c"*/
1463 void
1464 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1465 {
1466         mono_local_regalloc (cfg, bb);
1467 }
1468
1469 static unsigned char*
1470 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1471 {
1472         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1473         x86_fnstcw_membase(code, X86_ESP, 0);
1474         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1475         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1476         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1477         x86_fldcw_membase (code, X86_ESP, 2);
1478         if (size == 8) {
1479                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1480                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1481                 x86_pop_reg (code, dreg);
1482                 /* FIXME: need the high register 
1483                  * x86_pop_reg (code, dreg_high);
1484                  */
1485         } else {
1486                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1487                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1488                 x86_pop_reg (code, dreg);
1489         }
1490         x86_fldcw_membase (code, X86_ESP, 0);
1491         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1492
1493         if (size == 1)
1494                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1495         else if (size == 2)
1496                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1497         return code;
1498 }
1499
1500 static unsigned char*
1501 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1502 {
1503         int sreg = tree->sreg1;
1504         int need_touch = FALSE;
1505
1506 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1507         need_touch = TRUE;
1508 #endif
1509
1510         if (need_touch) {
1511                 guint8* br[5];
1512
1513                 /*
1514                  * Under Windows:
1515                  * If requested stack size is larger than one page,
1516                  * perform stack-touch operation
1517                  */
1518                 /*
1519                  * Generate stack probe code.
1520                  * Under Windows, it is necessary to allocate one page at a time,
1521                  * "touching" stack after each successful sub-allocation. This is
1522                  * because of the way stack growth is implemented - there is a
1523                  * guard page before the lowest stack page that is currently commited.
1524                  * Stack normally grows sequentially so OS traps access to the
1525                  * guard page and commits more pages when needed.
1526                  */
1527                 x86_test_reg_imm (code, sreg, ~0xFFF);
1528                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1529
1530                 br[2] = code; /* loop */
1531                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1532                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1533
1534                 /* 
1535                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1536                  * that follows only initializes the last part of the area.
1537                  */
1538                 /* Same as the init code below with size==0x1000 */
1539                 if (tree->flags & MONO_INST_INIT) {
1540                         x86_push_reg (code, X86_EAX);
1541                         x86_push_reg (code, X86_ECX);
1542                         x86_push_reg (code, X86_EDI);
1543                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1544                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1545                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1546                         x86_cld (code);
1547                         x86_prefix (code, X86_REP_PREFIX);
1548                         x86_stosl (code);
1549                         x86_pop_reg (code, X86_EDI);
1550                         x86_pop_reg (code, X86_ECX);
1551                         x86_pop_reg (code, X86_EAX);
1552                 }
1553
1554                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1555                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1556                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1557                 x86_patch (br[3], br[2]);
1558                 x86_test_reg_reg (code, sreg, sreg);
1559                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1560                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1561
1562                 br[1] = code; x86_jump8 (code, 0);
1563
1564                 x86_patch (br[0], code);
1565                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1566                 x86_patch (br[1], code);
1567                 x86_patch (br[4], code);
1568         }
1569         else
1570                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1571
1572         if (tree->flags & MONO_INST_INIT) {
1573                 int offset = 0;
1574                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1575                         x86_push_reg (code, X86_EAX);
1576                         offset += 4;
1577                 }
1578                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1579                         x86_push_reg (code, X86_ECX);
1580                         offset += 4;
1581                 }
1582                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1583                         x86_push_reg (code, X86_EDI);
1584                         offset += 4;
1585                 }
1586                 
1587                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1588                 if (sreg != X86_ECX)
1589                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1590                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1591                                 
1592                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1593                 x86_cld (code);
1594                 x86_prefix (code, X86_REP_PREFIX);
1595                 x86_stosl (code);
1596                 
1597                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1598                         x86_pop_reg (code, X86_EDI);
1599                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1600                         x86_pop_reg (code, X86_ECX);
1601                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1602                         x86_pop_reg (code, X86_EAX);
1603         }
1604         return code;
1605 }
1606
1607
1608 static guint8*
1609 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1610 {
1611         CallInfo *cinfo;
1612         int quad;
1613
1614         /* Move return value to the target register */
1615         switch (ins->opcode) {
1616         case CEE_CALL:
1617         case OP_CALL_REG:
1618         case OP_CALL_MEMBASE:
1619                 if (ins->dreg != X86_EAX)
1620                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1621                 break;
1622         case OP_VCALL:
1623         case OP_VCALL_REG:
1624         case OP_VCALL_MEMBASE:
1625                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1626                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1627                         /* Pop the destination address from the stack */
1628                         x86_pop_reg (code, X86_ECX);
1629                         
1630                         for (quad = 0; quad < 2; quad ++) {
1631                                 switch (cinfo->ret.pair_storage [quad]) {
1632                                 case ArgInIReg:
1633                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1634                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1635                                         break;
1636                                 case ArgNone:
1637                                         break;
1638                                 default:
1639                                         g_assert_not_reached ();
1640                                 }
1641                         }
1642                 }
1643                 g_free (cinfo);
1644         default:
1645                 break;
1646         }
1647
1648         return code;
1649 }
1650
1651 static guint8*
1652 emit_tls_get (guint8* code, int dreg, int tls_offset)
1653 {
1654 #ifdef PLATFORM_WIN32
1655         /* 
1656          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1657          * Journal and/or a disassembly of the TlsGet () function.
1658          */
1659         g_assert (tls_offset < 64);
1660         x86_prefix (code, X86_FS_PREFIX);
1661         x86_mov_reg_mem (code, dreg, 0x18, 4);
1662         /* Dunno what this does but TlsGetValue () contains it */
1663         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1664         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1665 #else
1666         x86_prefix (code, X86_GS_PREFIX);
1667         x86_mov_reg_mem (code, dreg, tls_offset, 4);                    
1668 #endif
1669         return code;
1670 }
1671
1672 #define REAL_PRINT_REG(text,reg) \
1673 mono_assert (reg >= 0); \
1674 x86_push_reg (code, X86_EAX); \
1675 x86_push_reg (code, X86_EDX); \
1676 x86_push_reg (code, X86_ECX); \
1677 x86_push_reg (code, reg); \
1678 x86_push_imm (code, reg); \
1679 x86_push_imm (code, text " %d %p\n"); \
1680 x86_mov_reg_imm (code, X86_EAX, printf); \
1681 x86_call_reg (code, X86_EAX); \
1682 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1683 x86_pop_reg (code, X86_ECX); \
1684 x86_pop_reg (code, X86_EDX); \
1685 x86_pop_reg (code, X86_EAX);
1686
1687 /* benchmark and set based on cpu */
1688 #define LOOP_ALIGNMENT 8
1689 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1690
1691 void
1692 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1693 {
1694         MonoInst *ins;
1695         MonoCallInst *call;
1696         guint offset;
1697         guint8 *code = cfg->native_code + cfg->code_len;
1698         MonoInst *last_ins = NULL;
1699         guint last_offset = 0;
1700         int max_len, cpos;
1701
1702         if (cfg->opt & MONO_OPT_PEEPHOLE)
1703                 peephole_pass (cfg, bb);
1704
1705         if (cfg->opt & MONO_OPT_LOOP) {
1706                 int pad, align = LOOP_ALIGNMENT;
1707                 /* set alignment depending on cpu */
1708                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1709                         pad = align - pad;
1710                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1711                         x86_padding (code, pad);
1712                         cfg->code_len += pad;
1713                         bb->native_offset = cfg->code_len;
1714                 }
1715         }
1716
1717         if (cfg->verbose_level > 2)
1718                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1719
1720         cpos = bb->max_offset;
1721
1722         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1723                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1724                 g_assert (!cfg->compile_aot);
1725                 cpos += 6;
1726
1727                 cov->data [bb->dfn].cil_code = bb->cil_code;
1728                 /* this is not thread save, but good enough */
1729                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1730         }
1731
1732         offset = code - cfg->native_code;
1733
1734         ins = bb->code;
1735         while (ins) {
1736                 offset = code - cfg->native_code;
1737
1738                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1739
1740                 if (offset > (cfg->code_size - max_len - 16)) {
1741                         cfg->code_size *= 2;
1742                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1743                         code = cfg->native_code + offset;
1744                         mono_jit_stats.code_reallocs++;
1745                 }
1746
1747                 mono_debug_record_line_number (cfg, ins, offset);
1748
1749                 switch (ins->opcode) {
1750                 case OP_BIGMUL:
1751                         x86_mul_reg (code, ins->sreg2, TRUE);
1752                         break;
1753                 case OP_BIGMUL_UN:
1754                         x86_mul_reg (code, ins->sreg2, FALSE);
1755                         break;
1756                 case OP_X86_SETEQ_MEMBASE:
1757                 case OP_X86_SETNE_MEMBASE:
1758                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1759                                          ins->inst_basereg, ins->inst_offset, TRUE);
1760                         break;
1761                 case OP_STOREI1_MEMBASE_IMM:
1762                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1763                         break;
1764                 case OP_STOREI2_MEMBASE_IMM:
1765                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1766                         break;
1767                 case OP_STORE_MEMBASE_IMM:
1768                 case OP_STOREI4_MEMBASE_IMM:
1769                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1770                         break;
1771                 case OP_STOREI1_MEMBASE_REG:
1772                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1773                         break;
1774                 case OP_STOREI2_MEMBASE_REG:
1775                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1776                         break;
1777                 case OP_STORE_MEMBASE_REG:
1778                 case OP_STOREI4_MEMBASE_REG:
1779                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1780                         break;
1781                 case CEE_LDIND_I:
1782                 case CEE_LDIND_I4:
1783                 case CEE_LDIND_U4:
1784                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1785                         break;
1786                 case OP_LOADU4_MEM:
1787                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1788                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1789                         break;
1790                 case OP_LOAD_MEMBASE:
1791                 case OP_LOADI4_MEMBASE:
1792                 case OP_LOADU4_MEMBASE:
1793                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1794                         break;
1795                 case OP_LOADU1_MEMBASE:
1796                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1797                         break;
1798                 case OP_LOADI1_MEMBASE:
1799                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1800                         break;
1801                 case OP_LOADU2_MEMBASE:
1802                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1803                         break;
1804                 case OP_LOADI2_MEMBASE:
1805                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1806                         break;
1807                 case CEE_CONV_I1:
1808                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1809                         break;
1810                 case CEE_CONV_I2:
1811                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1812                         break;
1813                 case CEE_CONV_U1:
1814                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1815                         break;
1816                 case CEE_CONV_U2:
1817                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1818                         break;
1819                 case OP_COMPARE:
1820                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1821                         break;
1822                 case OP_COMPARE_IMM:
1823                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1824                         break;
1825                 case OP_X86_COMPARE_MEMBASE_REG:
1826                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1827                         break;
1828                 case OP_X86_COMPARE_MEMBASE_IMM:
1829                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1830                         break;
1831                 case OP_X86_COMPARE_MEMBASE8_IMM:
1832                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1833                         break;
1834                 case OP_X86_COMPARE_REG_MEMBASE:
1835                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1836                         break;
1837                 case OP_X86_COMPARE_MEM_IMM:
1838                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1839                         break;
1840                 case OP_X86_TEST_NULL:
1841                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1842                         break;
1843                 case OP_X86_ADD_MEMBASE_IMM:
1844                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1845                         break;
1846                 case OP_X86_ADD_MEMBASE:
1847                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1848                         break;
1849                 case OP_X86_SUB_MEMBASE_IMM:
1850                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1851                         break;
1852                 case OP_X86_SUB_MEMBASE:
1853                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1854                         break;
1855                 case OP_X86_INC_MEMBASE:
1856                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1857                         break;
1858                 case OP_X86_INC_REG:
1859                         x86_inc_reg (code, ins->dreg);
1860                         break;
1861                 case OP_X86_DEC_MEMBASE:
1862                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1863                         break;
1864                 case OP_X86_DEC_REG:
1865                         x86_dec_reg (code, ins->dreg);
1866                         break;
1867                 case OP_X86_MUL_MEMBASE:
1868                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1869                         break;
1870                 case CEE_BREAK:
1871                         x86_breakpoint (code);
1872                         break;
1873                 case OP_ADDCC:
1874                 case CEE_ADD:
1875                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1876                         break;
1877                 case OP_ADC:
1878                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1879                         break;
1880                 case OP_ADDCC_IMM:
1881                 case OP_ADD_IMM:
1882                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1883                         break;
1884                 case OP_ADC_IMM:
1885                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1886                         break;
1887                 case OP_SUBCC:
1888                 case CEE_SUB:
1889                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1890                         break;
1891                 case OP_SBB:
1892                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1893                         break;
1894                 case OP_SUBCC_IMM:
1895                 case OP_SUB_IMM:
1896                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1897                         break;
1898                 case OP_SBB_IMM:
1899                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1900                         break;
1901                 case CEE_AND:
1902                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1903                         break;
1904                 case OP_AND_IMM:
1905                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1906                         break;
1907                 case CEE_DIV:
1908                         x86_cdq (code);
1909                         x86_div_reg (code, ins->sreg2, TRUE);
1910                         break;
1911                 case CEE_DIV_UN:
1912                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1913                         x86_div_reg (code, ins->sreg2, FALSE);
1914                         break;
1915                 case OP_DIV_IMM:
1916                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1917                         x86_cdq (code);
1918                         x86_div_reg (code, ins->sreg2, TRUE);
1919                         break;
1920                 case CEE_REM:
1921                         x86_cdq (code);
1922                         x86_div_reg (code, ins->sreg2, TRUE);
1923                         break;
1924                 case CEE_REM_UN:
1925                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1926                         x86_div_reg (code, ins->sreg2, FALSE);
1927                         break;
1928                 case OP_REM_IMM:
1929                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1930                         x86_cdq (code);
1931                         x86_div_reg (code, ins->sreg2, TRUE);
1932                         break;
1933                 case CEE_OR:
1934                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
1935                         break;
1936                 case OP_OR_IMM:
1937                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
1938                         break;
1939                 case CEE_XOR:
1940                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
1941                         break;
1942                 case OP_XOR_IMM:
1943                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
1944                         break;
1945                 case CEE_SHL:
1946                         g_assert (ins->sreg2 == X86_ECX);
1947                         x86_shift_reg (code, X86_SHL, ins->dreg);
1948                         break;
1949                 case CEE_SHR:
1950                         g_assert (ins->sreg2 == X86_ECX);
1951                         x86_shift_reg (code, X86_SAR, ins->dreg);
1952                         break;
1953                 case OP_SHR_IMM:
1954                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
1955                         break;
1956                 case OP_SHR_UN_IMM:
1957                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
1958                         break;
1959                 case CEE_SHR_UN:
1960                         g_assert (ins->sreg2 == X86_ECX);
1961                         x86_shift_reg (code, X86_SHR, ins->dreg);
1962                         break;
1963                 case OP_SHL_IMM:
1964                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
1965                         break;
1966                 case OP_LSHL: {
1967                         guint8 *jump_to_end;
1968
1969                         /* handle shifts below 32 bits */
1970                         x86_shld_reg (code, ins->unused, ins->sreg1);
1971                         x86_shift_reg (code, X86_SHL, ins->sreg1);
1972
1973                         x86_test_reg_imm (code, X86_ECX, 32);
1974                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
1975
1976                         /* handle shift over 32 bit */
1977                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
1978                         x86_clear_reg (code, ins->sreg1);
1979                         
1980                         x86_patch (jump_to_end, code);
1981                         }
1982                         break;
1983                 case OP_LSHR: {
1984                         guint8 *jump_to_end;
1985
1986                         /* handle shifts below 32 bits */
1987                         x86_shrd_reg (code, ins->sreg1, ins->unused);
1988                         x86_shift_reg (code, X86_SAR, ins->unused);
1989
1990                         x86_test_reg_imm (code, X86_ECX, 32);
1991                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
1992
1993                         /* handle shifts over 31 bits */
1994                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
1995                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
1996                         
1997                         x86_patch (jump_to_end, code);
1998                         }
1999                         break;
2000                 case OP_LSHR_UN: {
2001                         guint8 *jump_to_end;
2002
2003                         /* handle shifts below 32 bits */
2004                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2005                         x86_shift_reg (code, X86_SHR, ins->unused);
2006
2007                         x86_test_reg_imm (code, X86_ECX, 32);
2008                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2009
2010                         /* handle shifts over 31 bits */
2011                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2012                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
2013                         
2014                         x86_patch (jump_to_end, code);
2015                         }
2016                         break;
2017                 case OP_LSHL_IMM:
2018                         if (ins->inst_imm >= 32) {
2019                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2020                                 x86_clear_reg (code, ins->sreg1);
2021                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
2022                         } else {
2023                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
2024                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2025                         }
2026                         break;
2027                 case OP_LSHR_IMM:
2028                         if (ins->inst_imm >= 32) {
2029                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
2030                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
2031                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2032                         } else {
2033                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2034                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
2035                         }
2036                         break;
2037                 case OP_LSHR_UN_IMM:
2038                         if (ins->inst_imm >= 32) {
2039                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2040                                 x86_clear_reg (code, ins->unused);
2041                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2042                         } else {
2043                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2044                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
2045                         }
2046                         break;
2047                 case CEE_NOT:
2048                         x86_not_reg (code, ins->sreg1);
2049                         break;
2050                 case CEE_NEG:
2051                         x86_neg_reg (code, ins->sreg1);
2052                         break;
2053                 case OP_SEXT_I1:
2054                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2055                         break;
2056                 case OP_SEXT_I2:
2057                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2058                         break;
2059                 case CEE_MUL:
2060                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2061                         break;
2062                 case OP_MUL_IMM:
2063                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2064                         break;
2065                 case CEE_MUL_OVF:
2066                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2067                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2068                         break;
2069                 case CEE_MUL_OVF_UN: {
2070                         /* the mul operation and the exception check should most likely be split */
2071                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2072                         /*g_assert (ins->sreg2 == X86_EAX);
2073                         g_assert (ins->dreg == X86_EAX);*/
2074                         if (ins->sreg2 == X86_EAX) {
2075                                 non_eax_reg = ins->sreg1;
2076                         } else if (ins->sreg1 == X86_EAX) {
2077                                 non_eax_reg = ins->sreg2;
2078                         } else {
2079                                 /* no need to save since we're going to store to it anyway */
2080                                 if (ins->dreg != X86_EAX) {
2081                                         saved_eax = TRUE;
2082                                         x86_push_reg (code, X86_EAX);
2083                                 }
2084                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2085                                 non_eax_reg = ins->sreg2;
2086                         }
2087                         if (ins->dreg == X86_EDX) {
2088                                 if (!saved_eax) {
2089                                         saved_eax = TRUE;
2090                                         x86_push_reg (code, X86_EAX);
2091                                 }
2092                         } else if (ins->dreg != X86_EAX) {
2093                                 saved_edx = TRUE;
2094                                 x86_push_reg (code, X86_EDX);
2095                         }
2096                         x86_mul_reg (code, non_eax_reg, FALSE);
2097                         /* save before the check since pop and mov don't change the flags */
2098                         if (ins->dreg != X86_EAX)
2099                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2100                         if (saved_edx)
2101                                 x86_pop_reg (code, X86_EDX);
2102                         if (saved_eax)
2103                                 x86_pop_reg (code, X86_EAX);
2104                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2105                         break;
2106                 }
2107                 case OP_ICONST:
2108                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2109                         break;
2110                 case OP_AOTCONST:
2111                         g_assert_not_reached ();
2112                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2113                         x86_mov_reg_imm (code, ins->dreg, 0);
2114                         break;
2115                 case OP_LOAD_GOTADDR:
2116                         x86_call_imm (code, 0);
2117                         /* 
2118                          * The patch needs to point to the pop, since the GOT offset needs 
2119                          * to be added to that address.
2120                          */
2121                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2122                         x86_pop_reg (code, ins->dreg);
2123                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2124                         break;
2125                 case OP_GOT_ENTRY:
2126                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2127                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2128                         break;
2129                 case OP_X86_PUSH_GOT_ENTRY:
2130                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2131                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2132                         break;
2133                 case CEE_CONV_I4:
2134                 case OP_MOVE:
2135                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2136                         break;
2137                 case CEE_CONV_U4:
2138                         g_assert_not_reached ();
2139                 case CEE_JMP: {
2140                         /*
2141                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2142                          * Keep in sync with the code in emit_epilog.
2143                          */
2144                         int pos = 0;
2145
2146                         /* FIXME: no tracing support... */
2147                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2148                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2149                         /* reset offset to make max_len work */
2150                         offset = code - cfg->native_code;
2151
2152                         g_assert (!cfg->method->save_lmf);
2153
2154                         if (cfg->used_int_regs & (1 << X86_EBX))
2155                                 pos -= 4;
2156                         if (cfg->used_int_regs & (1 << X86_EDI))
2157                                 pos -= 4;
2158                         if (cfg->used_int_regs & (1 << X86_ESI))
2159                                 pos -= 4;
2160                         if (pos)
2161                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2162         
2163                         if (cfg->used_int_regs & (1 << X86_ESI))
2164                                 x86_pop_reg (code, X86_ESI);
2165                         if (cfg->used_int_regs & (1 << X86_EDI))
2166                                 x86_pop_reg (code, X86_EDI);
2167                         if (cfg->used_int_regs & (1 << X86_EBX))
2168                                 x86_pop_reg (code, X86_EBX);
2169         
2170                         /* restore ESP/EBP */
2171                         x86_leave (code);
2172                         offset = code - cfg->native_code;
2173                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2174                         x86_jump32 (code, 0);
2175                         break;
2176                 }
2177                 case OP_CHECK_THIS:
2178                         /* ensure ins->sreg1 is not NULL
2179                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2180                          * cmp DWORD PTR [eax], 0
2181                          */
2182                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2183                         break;
2184                 case OP_ARGLIST: {
2185                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2186                         x86_push_reg (code, hreg);
2187                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2188                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2189                         x86_pop_reg (code, hreg);
2190                         break;
2191                 }
2192                 case OP_FCALL:
2193                 case OP_LCALL:
2194                 case OP_VCALL:
2195                 case OP_VOIDCALL:
2196                 case CEE_CALL:
2197                         call = (MonoCallInst*)ins;
2198                         if (ins->flags & MONO_INST_HAS_METHOD)
2199                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2200                         else
2201                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2202                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2203                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2204                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2205                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2206                                  * smart enough to do that optimization yet
2207                                  *
2208                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2209                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2210                                  * (most likely from locality benefits). People with other processors should
2211                                  * check on theirs to see what happens.
2212                                  */
2213                                 if (call->stack_usage == 4) {
2214                                         /* we want to use registers that won't get used soon, so use
2215                                          * ecx, as eax will get allocated first. edx is used by long calls,
2216                                          * so we can't use that.
2217                                          */
2218                                         
2219                                         x86_pop_reg (code, X86_ECX);
2220                                 } else {
2221                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2222                                 }
2223                         }
2224                         code = emit_move_return_value (cfg, ins, code);
2225                         break;
2226                 case OP_FCALL_REG:
2227                 case OP_LCALL_REG:
2228                 case OP_VCALL_REG:
2229                 case OP_VOIDCALL_REG:
2230                 case OP_CALL_REG:
2231                         call = (MonoCallInst*)ins;
2232                         x86_call_reg (code, ins->sreg1);
2233                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2234                                 if (call->stack_usage == 4)
2235                                         x86_pop_reg (code, X86_ECX);
2236                                 else
2237                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2238                         }
2239                         code = emit_move_return_value (cfg, ins, code);
2240                         break;
2241                 case OP_FCALL_MEMBASE:
2242                 case OP_LCALL_MEMBASE:
2243                 case OP_VCALL_MEMBASE:
2244                 case OP_VOIDCALL_MEMBASE:
2245                 case OP_CALL_MEMBASE:
2246                         call = (MonoCallInst*)ins;
2247                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2248                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2249                                 if (call->stack_usage == 4)
2250                                         x86_pop_reg (code, X86_ECX);
2251                                 else
2252                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2253                         }
2254                         code = emit_move_return_value (cfg, ins, code);
2255                         break;
2256                 case OP_OUTARG:
2257                 case OP_X86_PUSH:
2258                         x86_push_reg (code, ins->sreg1);
2259                         break;
2260                 case OP_X86_PUSH_IMM:
2261                         x86_push_imm (code, ins->inst_imm);
2262                         break;
2263                 case OP_X86_PUSH_MEMBASE:
2264                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2265                         break;
2266                 case OP_X86_PUSH_OBJ: 
2267                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2268                         x86_push_reg (code, X86_EDI);
2269                         x86_push_reg (code, X86_ESI);
2270                         x86_push_reg (code, X86_ECX);
2271                         if (ins->inst_offset)
2272                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2273                         else
2274                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2275                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2276                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2277                         x86_cld (code);
2278                         x86_prefix (code, X86_REP_PREFIX);
2279                         x86_movsd (code);
2280                         x86_pop_reg (code, X86_ECX);
2281                         x86_pop_reg (code, X86_ESI);
2282                         x86_pop_reg (code, X86_EDI);
2283                         break;
2284                 case OP_X86_LEA:
2285                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2286                         break;
2287                 case OP_X86_LEA_MEMBASE:
2288                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2289                         break;
2290                 case OP_X86_XCHG:
2291                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2292                         break;
2293                 case OP_LOCALLOC:
2294                         /* keep alignment */
2295                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2296                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2297                         code = mono_emit_stack_alloc (code, ins);
2298                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2299                         break;
2300                 case CEE_RET:
2301                         x86_ret (code);
2302                         break;
2303                 case CEE_THROW: {
2304                         x86_push_reg (code, ins->sreg1);
2305                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2306                                                           (gpointer)"mono_arch_throw_exception");
2307                         break;
2308                 }
2309                 case OP_RETHROW: {
2310                         x86_push_reg (code, ins->sreg1);
2311                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2312                                                           (gpointer)"mono_arch_rethrow_exception");
2313                         break;
2314                 }
2315                 case OP_CALL_HANDLER: 
2316                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2317                         x86_call_imm (code, 0);
2318                         break;
2319                 case OP_LABEL:
2320                         ins->inst_c0 = code - cfg->native_code;
2321                         break;
2322                 case CEE_BR:
2323                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2324                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2325                         //break;
2326                         if (ins->flags & MONO_INST_BRLABEL) {
2327                                 if (ins->inst_i0->inst_c0) {
2328                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2329                                 } else {
2330                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2331                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2332                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2333                                                 x86_jump8 (code, 0);
2334                                         else 
2335                                                 x86_jump32 (code, 0);
2336                                 }
2337                         } else {
2338                                 if (ins->inst_target_bb->native_offset) {
2339                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2340                                 } else {
2341                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2342                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2343                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2344                                                 x86_jump8 (code, 0);
2345                                         else 
2346                                                 x86_jump32 (code, 0);
2347                                 } 
2348                         }
2349                         break;
2350                 case OP_BR_REG:
2351                         x86_jump_reg (code, ins->sreg1);
2352                         break;
2353                 case OP_CEQ:
2354                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2355                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2356                         break;
2357                 case OP_CLT:
2358                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2359                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2360                         break;
2361                 case OP_CLT_UN:
2362                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2363                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2364                         break;
2365                 case OP_CGT:
2366                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2367                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2368                         break;
2369                 case OP_CGT_UN:
2370                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2371                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2372                         break;
2373                 case OP_CNE:
2374                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2375                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2376                         break;
2377                 case OP_COND_EXC_EQ:
2378                 case OP_COND_EXC_NE_UN:
2379                 case OP_COND_EXC_LT:
2380                 case OP_COND_EXC_LT_UN:
2381                 case OP_COND_EXC_GT:
2382                 case OP_COND_EXC_GT_UN:
2383                 case OP_COND_EXC_GE:
2384                 case OP_COND_EXC_GE_UN:
2385                 case OP_COND_EXC_LE:
2386                 case OP_COND_EXC_LE_UN:
2387                 case OP_COND_EXC_OV:
2388                 case OP_COND_EXC_NO:
2389                 case OP_COND_EXC_C:
2390                 case OP_COND_EXC_NC:
2391                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
2392                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2393                         break;
2394                 case CEE_BEQ:
2395                 case CEE_BNE_UN:
2396                 case CEE_BLT:
2397                 case CEE_BLT_UN:
2398                 case CEE_BGT:
2399                 case CEE_BGT_UN:
2400                 case CEE_BGE:
2401                 case CEE_BGE_UN:
2402                 case CEE_BLE:
2403                 case CEE_BLE_UN:
2404                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2405                         break;
2406
2407                 /* floating point opcodes */
2408                 case OP_R8CONST: {
2409                         double d = *(double *)ins->inst_p0;
2410
2411                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2412                                 x86_fldz (code);
2413                         } else if (d == 1.0) {
2414                                 x86_fld1 (code);
2415                         } else {
2416                                 if (cfg->compile_aot) {
2417                                         guint32 *val = (guint32*)&d;
2418                                         x86_push_imm (code, val [1]);
2419                                         x86_push_imm (code, val [0]);
2420                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2421                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2422                                 }
2423                                 else {
2424                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2425                                         x86_fld (code, NULL, TRUE);
2426                                 }
2427                         }
2428                         break;
2429                 }
2430                 case OP_R4CONST: {
2431                         float f = *(float *)ins->inst_p0;
2432
2433                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2434                                 x86_fldz (code);
2435                         } else if (f == 1.0) {
2436                                 x86_fld1 (code);
2437                         } else {
2438                                 if (cfg->compile_aot) {
2439                                         guint32 val = *(guint32*)&f;
2440                                         x86_push_imm (code, val);
2441                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2442                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2443                                 }
2444                                 else {
2445                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2446                                         x86_fld (code, NULL, FALSE);
2447                                 }
2448                         }
2449                         break;
2450                 }
2451                 case OP_STORER8_MEMBASE_REG:
2452                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2453                         break;
2454                 case OP_LOADR8_SPILL_MEMBASE:
2455                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2456                         x86_fxch (code, 1);
2457                         break;
2458                 case OP_LOADR8_MEMBASE:
2459                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2460                         break;
2461                 case OP_STORER4_MEMBASE_REG:
2462                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2463                         break;
2464                 case OP_LOADR4_MEMBASE:
2465                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2466                         break;
2467                 case CEE_CONV_R4: /* FIXME: change precision */
2468                 case CEE_CONV_R8:
2469                         x86_push_reg (code, ins->sreg1);
2470                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2471                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2472                         break;
2473                 case OP_X86_FP_LOAD_I8:
2474                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2475                         break;
2476                 case OP_X86_FP_LOAD_I4:
2477                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2478                         break;
2479                 case OP_FCONV_TO_I1:
2480                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2481                         break;
2482                 case OP_FCONV_TO_U1:
2483                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2484                         break;
2485                 case OP_FCONV_TO_I2:
2486                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2487                         break;
2488                 case OP_FCONV_TO_U2:
2489                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2490                         break;
2491                 case OP_FCONV_TO_I4:
2492                 case OP_FCONV_TO_I:
2493                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2494                         break;
2495                 case OP_FCONV_TO_I8:
2496                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2497                         x86_fnstcw_membase(code, X86_ESP, 0);
2498                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2499                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2500                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2501                         x86_fldcw_membase (code, X86_ESP, 2);
2502                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2503                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2504                         x86_pop_reg (code, ins->dreg);
2505                         x86_pop_reg (code, ins->unused);
2506                         x86_fldcw_membase (code, X86_ESP, 0);
2507                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2508                         break;
2509                 case OP_LCONV_TO_R_UN: { 
2510                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2511                         guint8 *br;
2512
2513                         /* load 64bit integer to FP stack */
2514                         x86_push_imm (code, 0);
2515                         x86_push_reg (code, ins->sreg2);
2516                         x86_push_reg (code, ins->sreg1);
2517                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2518                         /* store as 80bit FP value */
2519                         x86_fst80_membase (code, X86_ESP, 0);
2520                         
2521                         /* test if lreg is negative */
2522                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2523                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2524         
2525                         /* add correction constant mn */
2526                         x86_fld80_mem (code, mn);
2527                         x86_fld80_membase (code, X86_ESP, 0);
2528                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2529                         x86_fst80_membase (code, X86_ESP, 0);
2530
2531                         x86_patch (br, code);
2532
2533                         x86_fld80_membase (code, X86_ESP, 0);
2534                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2535
2536                         break;
2537                 }
2538                 case OP_LCONV_TO_OVF_I: {
2539                         guint8 *br [3], *label [1];
2540
2541                         /* 
2542                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2543                          */
2544                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2545
2546                         /* If the low word top bit is set, see if we are negative */
2547                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2548                         /* We are not negative (no top bit set, check for our top word to be zero */
2549                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2550                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2551                         label [0] = code;
2552
2553                         /* throw exception */
2554                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2555                         x86_jump32 (code, 0);
2556         
2557                         x86_patch (br [0], code);
2558                         /* our top bit is set, check that top word is 0xfffffff */
2559                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2560                 
2561                         x86_patch (br [1], code);
2562                         /* nope, emit exception */
2563                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2564                         x86_patch (br [2], label [0]);
2565
2566                         if (ins->dreg != ins->sreg1)
2567                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2568                         break;
2569                 }
2570                 case OP_FADD:
2571                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2572                         break;
2573                 case OP_FSUB:
2574                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2575                         break;          
2576                 case OP_FMUL:
2577                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2578                         break;          
2579                 case OP_FDIV:
2580                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2581                         break;          
2582                 case OP_FNEG:
2583                         x86_fchs (code);
2584                         break;          
2585                 case OP_SIN:
2586                         x86_fsin (code);
2587                         x86_fldz (code);
2588                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2589                         break;          
2590                 case OP_COS:
2591                         x86_fcos (code);
2592                         x86_fldz (code);
2593                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2594                         break;          
2595                 case OP_ABS:
2596                         x86_fabs (code);
2597                         break;          
2598                 case OP_TAN: {
2599                         /* 
2600                          * it really doesn't make sense to inline all this code,
2601                          * it's here just to show that things may not be as simple 
2602                          * as they appear.
2603                          */
2604                         guchar *check_pos, *end_tan, *pop_jump;
2605                         x86_push_reg (code, X86_EAX);
2606                         x86_fptan (code);
2607                         x86_fnstsw (code);
2608                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2609                         check_pos = code;
2610                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2611                         x86_fstp (code, 0); /* pop the 1.0 */
2612                         end_tan = code;
2613                         x86_jump8 (code, 0);
2614                         x86_fldpi (code);
2615                         x86_fp_op (code, X86_FADD, 0);
2616                         x86_fxch (code, 1);
2617                         x86_fprem1 (code);
2618                         x86_fstsw (code);
2619                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2620                         pop_jump = code;
2621                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2622                         x86_fstp (code, 1);
2623                         x86_fptan (code);
2624                         x86_patch (pop_jump, code);
2625                         x86_fstp (code, 0); /* pop the 1.0 */
2626                         x86_patch (check_pos, code);
2627                         x86_patch (end_tan, code);
2628                         x86_fldz (code);
2629                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2630                         x86_pop_reg (code, X86_EAX);
2631                         break;
2632                 }
2633                 case OP_ATAN:
2634                         x86_fld1 (code);
2635                         x86_fpatan (code);
2636                         x86_fldz (code);
2637                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2638                         break;          
2639                 case OP_SQRT:
2640                         x86_fsqrt (code);
2641                         break;          
2642                 case OP_X86_FPOP:
2643                         x86_fstp (code, 0);
2644                         break;          
2645                 case OP_FREM: {
2646                         guint8 *l1, *l2;
2647
2648                         x86_push_reg (code, X86_EAX);
2649                         /* we need to exchange ST(0) with ST(1) */
2650                         x86_fxch (code, 1);
2651
2652                         /* this requires a loop, because fprem somtimes 
2653                          * returns a partial remainder */
2654                         l1 = code;
2655                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2656                         /* x86_fprem1 (code); */
2657                         x86_fprem (code);
2658                         x86_fnstsw (code);
2659                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2660                         l2 = code + 2;
2661                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2662
2663                         /* pop result */
2664                         x86_fstp (code, 1);
2665
2666                         x86_pop_reg (code, X86_EAX);
2667                         break;
2668                 }
2669                 case OP_FCOMPARE:
2670                         if (cfg->opt & MONO_OPT_FCMOV) {
2671                                 x86_fcomip (code, 1);
2672                                 x86_fstp (code, 0);
2673                                 break;
2674                         }
2675                         /* this overwrites EAX */
2676                         EMIT_FPCOMPARE(code);
2677                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2678                         break;
2679                 case OP_FCEQ:
2680                         if (cfg->opt & MONO_OPT_FCMOV) {
2681                                 /* zeroing the register at the start results in 
2682                                  * shorter and faster code (we can also remove the widening op)
2683                                  */
2684                                 guchar *unordered_check;
2685                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2686                                 x86_fcomip (code, 1);
2687                                 x86_fstp (code, 0);
2688                                 unordered_check = code;
2689                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2690                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2691                                 x86_patch (unordered_check, code);
2692                                 break;
2693                         }
2694                         if (ins->dreg != X86_EAX) 
2695                                 x86_push_reg (code, X86_EAX);
2696
2697                         EMIT_FPCOMPARE(code);
2698                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2699                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2700                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2701                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2702
2703                         if (ins->dreg != X86_EAX) 
2704                                 x86_pop_reg (code, X86_EAX);
2705                         break;
2706                 case OP_FCLT:
2707                 case OP_FCLT_UN:
2708                         if (cfg->opt & MONO_OPT_FCMOV) {
2709                                 /* zeroing the register at the start results in 
2710                                  * shorter and faster code (we can also remove the widening op)
2711                                  */
2712                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2713                                 x86_fcomip (code, 1);
2714                                 x86_fstp (code, 0);
2715                                 if (ins->opcode == OP_FCLT_UN) {
2716                                         guchar *unordered_check = code;
2717                                         guchar *jump_to_end;
2718                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2719                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2720                                         jump_to_end = code;
2721                                         x86_jump8 (code, 0);
2722                                         x86_patch (unordered_check, code);
2723                                         x86_inc_reg (code, ins->dreg);
2724                                         x86_patch (jump_to_end, code);
2725                                 } else {
2726                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2727                                 }
2728                                 break;
2729                         }
2730                         if (ins->dreg != X86_EAX) 
2731                                 x86_push_reg (code, X86_EAX);
2732
2733                         EMIT_FPCOMPARE(code);
2734                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2735                         if (ins->opcode == OP_FCLT_UN) {
2736                                 guchar *is_not_zero_check, *end_jump;
2737                                 is_not_zero_check = code;
2738                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2739                                 end_jump = code;
2740                                 x86_jump8 (code, 0);
2741                                 x86_patch (is_not_zero_check, code);
2742                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2743
2744                                 x86_patch (end_jump, code);
2745                         }
2746                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2747                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2748
2749                         if (ins->dreg != X86_EAX) 
2750                                 x86_pop_reg (code, X86_EAX);
2751                         break;
2752                 case OP_FCGT:
2753                 case OP_FCGT_UN:
2754                         if (cfg->opt & MONO_OPT_FCMOV) {
2755                                 /* zeroing the register at the start results in 
2756                                  * shorter and faster code (we can also remove the widening op)
2757                                  */
2758                                 guchar *unordered_check;
2759                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2760                                 x86_fcomip (code, 1);
2761                                 x86_fstp (code, 0);
2762                                 if (ins->opcode == OP_FCGT) {
2763                                         unordered_check = code;
2764                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2765                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2766                                         x86_patch (unordered_check, code);
2767                                 } else {
2768                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2769                                 }
2770                                 break;
2771                         }
2772                         if (ins->dreg != X86_EAX) 
2773                                 x86_push_reg (code, X86_EAX);
2774
2775                         EMIT_FPCOMPARE(code);
2776                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2777                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2778                         if (ins->opcode == OP_FCGT_UN) {
2779                                 guchar *is_not_zero_check, *end_jump;
2780                                 is_not_zero_check = code;
2781                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2782                                 end_jump = code;
2783                                 x86_jump8 (code, 0);
2784                                 x86_patch (is_not_zero_check, code);
2785                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2786         
2787                                 x86_patch (end_jump, code);
2788                         }
2789                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2790                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2791
2792                         if (ins->dreg != X86_EAX) 
2793                                 x86_pop_reg (code, X86_EAX);
2794                         break;
2795                 case OP_FBEQ:
2796                         if (cfg->opt & MONO_OPT_FCMOV) {
2797                                 guchar *jump = code;
2798                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2799                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2800                                 x86_patch (jump, code);
2801                                 break;
2802                         }
2803                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2804                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2805                         break;
2806                 case OP_FBNE_UN:
2807                         /* Branch if C013 != 100 */
2808                         if (cfg->opt & MONO_OPT_FCMOV) {
2809                                 /* branch if !ZF or (PF|CF) */
2810                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2811                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2812                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2813                                 break;
2814                         }
2815                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2816                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2817                         break;
2818                 case OP_FBLT:
2819                         if (cfg->opt & MONO_OPT_FCMOV) {
2820                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2821                                 break;
2822                         }
2823                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2824                         break;
2825                 case OP_FBLT_UN:
2826                         if (cfg->opt & MONO_OPT_FCMOV) {
2827                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2828                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2829                                 break;
2830                         }
2831                         if (ins->opcode == OP_FBLT_UN) {
2832                                 guchar *is_not_zero_check, *end_jump;
2833                                 is_not_zero_check = code;
2834                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2835                                 end_jump = code;
2836                                 x86_jump8 (code, 0);
2837                                 x86_patch (is_not_zero_check, code);
2838                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2839
2840                                 x86_patch (end_jump, code);
2841                         }
2842                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2843                         break;
2844                 case OP_FBGT:
2845                 case OP_FBGT_UN:
2846                         if (cfg->opt & MONO_OPT_FCMOV) {
2847                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2848                                 break;
2849                         }
2850                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2851                         if (ins->opcode == OP_FBGT_UN) {
2852                                 guchar *is_not_zero_check, *end_jump;
2853                                 is_not_zero_check = code;
2854                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2855                                 end_jump = code;
2856                                 x86_jump8 (code, 0);
2857                                 x86_patch (is_not_zero_check, code);
2858                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2859
2860                                 x86_patch (end_jump, code);
2861                         }
2862                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2863                         break;
2864                 case OP_FBGE:
2865                         /* Branch if C013 == 100 or 001 */
2866                         if (cfg->opt & MONO_OPT_FCMOV) {
2867                                 guchar *br1;
2868
2869                                 /* skip branch if C1=1 */
2870                                 br1 = code;
2871                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2872                                 /* branch if (C0 | C3) = 1 */
2873                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
2874                                 x86_patch (br1, code);
2875                                 break;
2876                         }
2877                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2878                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2879                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2880                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2881                         break;
2882                 case OP_FBGE_UN:
2883                         /* Branch if C013 == 000 */
2884                         if (cfg->opt & MONO_OPT_FCMOV) {
2885                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
2886                                 break;
2887                         }
2888                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2889                         break;
2890                 case OP_FBLE:
2891                         /* Branch if C013=000 or 100 */
2892                         if (cfg->opt & MONO_OPT_FCMOV) {
2893                                 guchar *br1;
2894
2895                                 /* skip branch if C1=1 */
2896                                 br1 = code;
2897                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2898                                 /* branch if C0=0 */
2899                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
2900                                 x86_patch (br1, code);
2901                                 break;
2902                         }
2903                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
2904                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
2905                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2906                         break;
2907                 case OP_FBLE_UN:
2908                         /* Branch if C013 != 001 */
2909                         if (cfg->opt & MONO_OPT_FCMOV) {
2910                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2911                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
2912                                 break;
2913                         }
2914                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2915                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2916                         break;
2917                 case CEE_CKFINITE: {
2918                         x86_push_reg (code, X86_EAX);
2919                         x86_fxam (code);
2920                         x86_fnstsw (code);
2921                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
2922                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2923                         x86_pop_reg (code, X86_EAX);
2924                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
2925                         break;
2926                 }
2927                 case OP_TLS_GET: {
2928                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
2929                         break;
2930                 }
2931                 case OP_MEMORY_BARRIER: {
2932                         /* Not needed on x86 */
2933                         break;
2934                 }
2935                 case OP_ATOMIC_ADD_I4: {
2936                         int dreg = ins->dreg;
2937
2938                         if (dreg == ins->inst_basereg) {
2939                                 x86_push_reg (code, ins->sreg2);
2940                                 dreg = ins->sreg2;
2941                         } 
2942                         
2943                         if (dreg != ins->sreg2)
2944                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
2945
2946                         x86_prefix (code, X86_LOCK_PREFIX);
2947                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
2948
2949                         if (dreg != ins->dreg) {
2950                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
2951                                 x86_pop_reg (code, dreg);
2952                         }
2953
2954                         break;
2955                 }
2956                 case OP_ATOMIC_ADD_NEW_I4: {
2957                         int dreg = ins->dreg;
2958
2959                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
2960                         if (ins->sreg2 == dreg) {
2961                                 if (dreg == X86_EBX) {
2962                                         dreg = X86_EDI;
2963                                         if (ins->inst_basereg == X86_EDI)
2964                                                 dreg = X86_ESI;
2965                                 } else {
2966                                         dreg = X86_EBX;
2967                                         if (ins->inst_basereg == X86_EBX)
2968                                                 dreg = X86_EDI;
2969                                 }
2970                         } else if (ins->inst_basereg == dreg) {
2971                                 if (dreg == X86_EBX) {
2972                                         dreg = X86_EDI;
2973                                         if (ins->sreg2 == X86_EDI)
2974                                                 dreg = X86_ESI;
2975                                 } else {
2976                                         dreg = X86_EBX;
2977                                         if (ins->sreg2 == X86_EBX)
2978                                                 dreg = X86_EDI;
2979                                 }
2980                         }
2981
2982                         if (dreg != ins->dreg) {
2983                                 x86_push_reg (code, dreg);
2984                         }
2985
2986                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
2987                         x86_prefix (code, X86_LOCK_PREFIX);
2988                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
2989                         /* dreg contains the old value, add with sreg2 value */
2990                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
2991                         
2992                         if (ins->dreg != dreg) {
2993                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
2994                                 x86_pop_reg (code, dreg);
2995                         }
2996
2997                         break;
2998                 }
2999                 case OP_ATOMIC_EXCHANGE_I4: {
3000                         guchar *br[2];
3001                         int sreg2 = ins->sreg2;
3002                         int breg = ins->inst_basereg;
3003
3004                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3005                          * hack to overcome limits in x86 reg allocator 
3006                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3007                          */
3008                         if (ins->dreg != X86_EAX)
3009                                 x86_push_reg (code, X86_EAX);
3010                         
3011                         /* We need the EAX reg for the cmpxchg */
3012                         if (ins->sreg2 == X86_EAX) {
3013                                 x86_push_reg (code, X86_EDX);
3014                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3015                                 sreg2 = X86_EDX;
3016                         }
3017
3018                         if (breg == X86_EAX) {
3019                                 x86_push_reg (code, X86_ESI);
3020                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3021                                 breg = X86_ESI;
3022                         }
3023
3024                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3025
3026                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3027                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3028                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3029                         x86_patch (br [1], br [0]);
3030
3031                         if (breg != ins->inst_basereg)
3032                                 x86_pop_reg (code, X86_ESI);
3033
3034                         if (ins->dreg != X86_EAX) {
3035                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3036                                 x86_pop_reg (code, X86_EAX);
3037                         }
3038
3039                         if (ins->sreg2 != sreg2)
3040                                 x86_pop_reg (code, X86_EDX);
3041
3042                         break;
3043                 }
3044                 default:
3045                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3046                         g_assert_not_reached ();
3047                 }
3048
3049                 if ((code - cfg->native_code - offset) > max_len) {
3050                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3051                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3052                         g_assert_not_reached ();
3053                 }
3054                
3055                 cpos += max_len;
3056
3057                 last_ins = ins;
3058                 last_offset = offset;
3059                 
3060                 ins = ins->next;
3061         }
3062
3063         cfg->code_len = code - cfg->native_code;
3064 }
3065
3066 void
3067 mono_arch_register_lowlevel_calls (void)
3068 {
3069 }
3070
3071 void
3072 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3073 {
3074         MonoJumpInfo *patch_info;
3075         gboolean compile_aot = !run_cctors;
3076
3077         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3078                 unsigned char *ip = patch_info->ip.i + code;
3079                 const unsigned char *target;
3080
3081                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3082
3083                 if (compile_aot) {
3084                         switch (patch_info->type) {
3085                         case MONO_PATCH_INFO_BB:
3086                         case MONO_PATCH_INFO_LABEL:
3087                                 break;
3088                         default:
3089                                 /* No need to patch these */
3090                                 continue;
3091                         }
3092                 }
3093
3094                 switch (patch_info->type) {
3095                 case MONO_PATCH_INFO_IP:
3096                         *((gconstpointer *)(ip)) = target;
3097                         break;
3098                 case MONO_PATCH_INFO_CLASS_INIT: {
3099                         guint8 *code = ip;
3100                         /* Might already been changed to a nop */
3101                         x86_call_code (code, 0);
3102                         x86_patch (ip, target);
3103                         break;
3104                 }
3105                 case MONO_PATCH_INFO_ABS:
3106                 case MONO_PATCH_INFO_METHOD:
3107                 case MONO_PATCH_INFO_METHOD_JUMP:
3108                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3109                 case MONO_PATCH_INFO_BB:
3110                 case MONO_PATCH_INFO_LABEL:
3111                         x86_patch (ip, target);
3112                         break;
3113                 case MONO_PATCH_INFO_NONE:
3114                         break;
3115                 default: {
3116                         guint32 offset = mono_arch_get_patch_offset (ip);
3117                         *((gconstpointer *)(ip + offset)) = target;
3118                         break;
3119                 }
3120                 }
3121         }
3122 }
3123
3124 guint8 *
3125 mono_arch_emit_prolog (MonoCompile *cfg)
3126 {
3127         MonoMethod *method = cfg->method;
3128         MonoBasicBlock *bb;
3129         MonoMethodSignature *sig;
3130         MonoInst *inst;
3131         int alloc_size, pos, max_offset, i;
3132         guint8 *code;
3133
3134         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3135         code = cfg->native_code = g_malloc (cfg->code_size);
3136
3137         x86_push_reg (code, X86_EBP);
3138         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3139
3140         alloc_size = cfg->stack_offset;
3141         pos = 0;
3142
3143         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3144                 /* Might need to attach the thread to the JIT */
3145                 if (lmf_tls_offset != -1) {
3146                         guint8 *buf;
3147
3148                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3149                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3150                         buf = code;
3151                         x86_branch8 (code, X86_CC_NE, 0, 0);
3152                         x86_push_imm (code, cfg->domain);
3153                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3154                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3155                         x86_patch (buf, code);
3156 #ifdef PLATFORM_WIN32
3157                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3158                         /* FIXME: Add a separate key for LMF to avoid this */
3159                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3160 #endif
3161                 }
3162                 else {
3163                         g_assert (!cfg->compile_aot);
3164                         x86_push_imm (code, cfg->domain);
3165                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3166                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3167                 }
3168         }
3169
3170         if (method->save_lmf) {
3171                 pos += sizeof (MonoLMF);
3172
3173                 /* save the current IP */
3174                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3175                 x86_push_imm_template (code);
3176
3177                 /* save all caller saved regs */
3178                 x86_push_reg (code, X86_EBP);
3179                 x86_push_reg (code, X86_ESI);
3180                 x86_push_reg (code, X86_EDI);
3181                 x86_push_reg (code, X86_EBX);
3182
3183                 /* save method info */
3184                 x86_push_imm (code, method);
3185
3186                 /* get the address of lmf for the current thread */
3187                 /* 
3188                  * This is performance critical so we try to use some tricks to make
3189                  * it fast.
3190                  */
3191                 if (lmf_tls_offset != -1) {
3192                         /* Load lmf quicky using the GS register */
3193                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
3194 #ifdef PLATFORM_WIN32
3195                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3196                         /* FIXME: Add a separate key for LMF to avoid this */
3197                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3198 #endif
3199                 }
3200                 else {
3201                         if (cfg->compile_aot) {
3202                                 /* The GOT var does not exist yet */
3203                                 x86_call_imm (code, 0);
3204                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3205                                 x86_pop_reg (code, X86_EAX);
3206                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
3207                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3208                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
3209                         }
3210                         else
3211                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3212                 }
3213
3214                 /* push lmf */
3215                 x86_push_reg (code, X86_EAX); 
3216                 /* push *lfm (previous_lmf) */
3217                 x86_push_membase (code, X86_EAX, 0);
3218                 /* *(lmf) = ESP */
3219                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3220         } else {
3221
3222                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3223                         x86_push_reg (code, X86_EBX);
3224                         pos += 4;
3225                 }
3226
3227                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3228                         x86_push_reg (code, X86_EDI);
3229                         pos += 4;
3230                 }
3231
3232                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3233                         x86_push_reg (code, X86_ESI);
3234                         pos += 4;
3235                 }
3236         }
3237
3238         alloc_size -= pos;
3239
3240         if (alloc_size) {
3241                 /* See mono_emit_stack_alloc */
3242 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3243                 guint32 remaining_size = alloc_size;
3244                 while (remaining_size >= 0x1000) {
3245                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3246                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3247                         remaining_size -= 0x1000;
3248                 }
3249                 if (remaining_size)
3250                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3251 #else
3252                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3253 #endif
3254         }
3255
3256         /* compute max_offset in order to use short forward jumps */
3257         max_offset = 0;
3258         if (cfg->opt & MONO_OPT_BRANCH) {
3259                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3260                         MonoInst *ins = bb->code;
3261                         bb->max_offset = max_offset;
3262
3263                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3264                                 max_offset += 6;
3265                         /* max alignment for loops */
3266                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3267                                 max_offset += LOOP_ALIGNMENT;
3268
3269                         while (ins) {
3270                                 if (ins->opcode == OP_LABEL)
3271                                         ins->inst_c1 = max_offset;
3272                                 
3273                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3274                                 ins = ins->next;
3275                         }
3276                 }
3277         }
3278
3279         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3280                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3281
3282         /* load arguments allocated to register from the stack */
3283         sig = mono_method_signature (method);
3284         pos = 0;
3285
3286         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3287                 inst = cfg->varinfo [pos];
3288                 if (inst->opcode == OP_REGVAR) {
3289                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3290                         if (cfg->verbose_level > 2)
3291                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3292                 }
3293                 pos++;
3294         }
3295
3296         cfg->code_len = code - cfg->native_code;
3297
3298         return code;
3299 }
3300
3301 void
3302 mono_arch_emit_epilog (MonoCompile *cfg)
3303 {
3304         MonoMethod *method = cfg->method;
3305         MonoMethodSignature *sig = mono_method_signature (method);
3306         int quad, pos;
3307         guint32 stack_to_pop;
3308         guint8 *code;
3309         int max_epilog_size = 16;
3310         CallInfo *cinfo;
3311         
3312         if (cfg->method->save_lmf)
3313                 max_epilog_size += 128;
3314         
3315         if (mono_jit_trace_calls != NULL)
3316                 max_epilog_size += 50;
3317
3318         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3319                 cfg->code_size *= 2;
3320                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3321                 mono_jit_stats.code_reallocs++;
3322         }
3323
3324         code = cfg->native_code + cfg->code_len;
3325
3326         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3327                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3328
3329         /* the code restoring the registers must be kept in sync with CEE_JMP */
3330         pos = 0;
3331         
3332         if (method->save_lmf) {
3333                 gint32 prev_lmf_reg;
3334
3335                 /* Find a spare register */
3336                 switch (sig->ret->type) {
3337                 case MONO_TYPE_I8:
3338                 case MONO_TYPE_U8:
3339                         prev_lmf_reg = X86_EDI;
3340                         cfg->used_int_regs |= (1 << X86_EDI);
3341                         break;
3342                 default:
3343                         prev_lmf_reg = X86_EDX;
3344                         break;
3345                 }
3346
3347                 /* reg = previous_lmf */
3348                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
3349
3350                 /* ecx = lmf */
3351                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
3352
3353                 /* *(lmf) = previous_lmf */
3354                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3355
3356                 /* restore caller saved regs */
3357                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3358                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
3359                 }
3360
3361                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3362                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
3363                 }
3364                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3365                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
3366                 }
3367
3368                 /* EBP is restored by LEAVE */
3369         } else {
3370                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3371                         pos -= 4;
3372                 }
3373                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3374                         pos -= 4;
3375                 }
3376                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3377                         pos -= 4;
3378                 }
3379
3380                 if (pos)
3381                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3382
3383                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3384                         x86_pop_reg (code, X86_ESI);
3385                 }
3386                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3387                         x86_pop_reg (code, X86_EDI);
3388                 }
3389                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3390                         x86_pop_reg (code, X86_EBX);
3391                 }
3392         }
3393
3394         /* Load returned vtypes into registers if needed */
3395         cinfo = get_call_info (sig, FALSE);
3396         if (cinfo->ret.storage == ArgValuetypeInReg) {
3397                 for (quad = 0; quad < 2; quad ++) {
3398                         switch (cinfo->ret.pair_storage [quad]) {
3399                         case ArgInIReg:
3400                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3401                                 break;
3402                         case ArgOnFloatFpStack:
3403                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3404                                 break;
3405                         case ArgOnDoubleFpStack:
3406                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3407                                 break;
3408                         case ArgNone:
3409                                 break;
3410                         default:
3411                                 g_assert_not_reached ();
3412                         }
3413                 }
3414         }
3415
3416         x86_leave (code);
3417
3418         if (CALLCONV_IS_STDCALL (sig)) {
3419                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3420
3421                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3422         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3423                 stack_to_pop = 4;
3424         else
3425                 stack_to_pop = 0;
3426
3427         if (stack_to_pop)
3428                 x86_ret_imm (code, stack_to_pop);
3429         else
3430                 x86_ret (code);
3431
3432         g_free (cinfo);
3433
3434         cfg->code_len = code - cfg->native_code;
3435
3436         g_assert (cfg->code_len < cfg->code_size);
3437 }
3438
3439 void
3440 mono_arch_emit_exceptions (MonoCompile *cfg)
3441 {
3442         MonoJumpInfo *patch_info;
3443         int nthrows, i;
3444         guint8 *code;
3445         MonoClass *exc_classes [16];
3446         guint8 *exc_throw_start [16], *exc_throw_end [16];
3447         guint32 code_size;
3448         int exc_count = 0;
3449
3450         /* Compute needed space */
3451         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3452                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3453                         exc_count++;
3454         }
3455
3456         /* 
3457          * make sure we have enough space for exceptions
3458          * 16 is the size of two push_imm instructions and a call
3459          */
3460         if (cfg->compile_aot)
3461                 code_size = exc_count * 32;
3462         else
3463                 code_size = exc_count * 16;
3464
3465         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3466                 cfg->code_size *= 2;
3467                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3468                 mono_jit_stats.code_reallocs++;
3469         }
3470
3471         code = cfg->native_code + cfg->code_len;
3472
3473         nthrows = 0;
3474         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3475                 switch (patch_info->type) {
3476                 case MONO_PATCH_INFO_EXC: {
3477                         MonoClass *exc_class;
3478                         guint8 *buf, *buf2;
3479                         guint32 throw_ip;
3480
3481                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3482
3483                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3484                         g_assert (exc_class);
3485                         throw_ip = patch_info->ip.i;
3486
3487                         /* Find a throw sequence for the same exception class */
3488                         for (i = 0; i < nthrows; ++i)
3489                                 if (exc_classes [i] == exc_class)
3490                                         break;
3491                         if (i < nthrows) {
3492                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3493                                 x86_jump_code (code, exc_throw_start [i]);
3494                                 patch_info->type = MONO_PATCH_INFO_NONE;
3495                         }
3496                         else {
3497                                 guint32 got_reg = X86_EAX;
3498                                 guint32 size;
3499
3500                                 /* Compute size of code following the push <OFFSET> */
3501                                 if (cfg->compile_aot) {
3502                                         size = 5 + 6;
3503                                         if (!cfg->got_var)
3504                                                 size += 32;
3505                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
3506                                                 size += 6;
3507                                 }
3508                                 else
3509                                         size = 5 + 5;
3510
3511                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3512                                         /* Use the shorter form */
3513                                         buf = buf2 = code;
3514                                         x86_push_imm (code, 0);
3515                                 }
3516                                 else {
3517                                         buf = code;
3518                                         x86_push_imm (code, 0xf0f0f0f0);
3519                                         buf2 = code;
3520                                 }
3521
3522                                 if (nthrows < 16) {
3523                                         exc_classes [nthrows] = exc_class;
3524                                         exc_throw_start [nthrows] = code;
3525                                 }
3526
3527                                 if (cfg->compile_aot) {          
3528                                         /*
3529                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
3530                                          */
3531                                         if (!cfg->got_var) {
3532                                                 x86_call_imm (code, 0);
3533                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3534                                                 x86_pop_reg (code, X86_EAX);
3535                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
3536                                         }
3537                                         else {
3538                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
3539                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
3540                                                 else
3541                                                         got_reg = cfg->got_var->dreg;
3542                                         }
3543                                 }
3544
3545                                 x86_push_imm (code, exc_class->type_token);
3546                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3547                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3548                                 patch_info->ip.i = code - cfg->native_code;
3549                                 if (cfg->compile_aot)
3550                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
3551                                 else
3552                                         x86_call_code (code, 0);
3553                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3554                                 while (buf < buf2)
3555                                         x86_nop (buf);
3556
3557                                 if (nthrows < 16) {
3558                                         exc_throw_end [nthrows] = code;
3559                                         nthrows ++;
3560                                 }
3561                         }
3562                         break;
3563                 }
3564                 default:
3565                         /* do nothing */
3566                         break;
3567                 }
3568         }
3569
3570         cfg->code_len = code - cfg->native_code;
3571
3572         g_assert (cfg->code_len < cfg->code_size);
3573 }
3574
3575 void
3576 mono_arch_flush_icache (guint8 *code, gint size)
3577 {
3578         /* not needed */
3579 }
3580
3581 void
3582 mono_arch_flush_register_windows (void)
3583 {
3584 }
3585
3586 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
3587
3588 static void
3589 setup_stack (MonoJitTlsData *tls)
3590 {
3591         pthread_t self = pthread_self();
3592         pthread_attr_t attr;
3593         size_t stsize = 0;
3594         struct sigaltstack sa;
3595         guint8 *staddr = NULL;
3596         guint8 *current = (guint8*)&staddr;
3597
3598         if (mono_running_on_valgrind ())
3599                 return;
3600
3601         /* Determine stack boundaries */
3602         pthread_attr_init( &attr );
3603 #ifdef HAVE_PTHREAD_GETATTR_NP
3604         pthread_getattr_np( self, &attr );
3605 #else
3606 #ifdef HAVE_PTHREAD_ATTR_GET_NP
3607         pthread_attr_get_np( self, &attr );
3608 #elif defined(sun)
3609         pthread_attr_getstacksize( &attr, &stsize );
3610 #else
3611 #error "Not implemented"
3612 #endif
3613 #endif
3614 #ifndef sun
3615         pthread_attr_getstack( &attr, (void**)&staddr, &stsize );
3616 #endif
3617
3618         g_assert (staddr);
3619
3620         g_assert ((current > staddr) && (current < staddr + stsize));
3621
3622         tls->end_of_stack = staddr + stsize;
3623
3624         /*
3625          * threads created by nptl does not seem to have a guard page, and
3626          * since the main thread is not created by us, we can't even set one.
3627          * Increasing stsize fools the SIGSEGV signal handler into thinking this
3628          * is a stack overflow exception.
3629          */
3630         tls->stack_size = stsize + getpagesize ();
3631
3632         /* Setup an alternate signal stack */
3633         tls->signal_stack = mmap (0, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
3634         tls->signal_stack_size = SIGNAL_STACK_SIZE;
3635
3636         g_assert (tls->signal_stack);
3637
3638         sa.ss_sp = tls->signal_stack;
3639         sa.ss_size = SIGNAL_STACK_SIZE;
3640         sa.ss_flags = SS_ONSTACK;
3641         sigaltstack (&sa, NULL);
3642 }
3643
3644 #endif
3645
3646 /*
3647  * Support for fast access to the thread-local lmf structure using the GS
3648  * segment register on NPTL + kernel 2.6.x.
3649  */
3650
3651 static gboolean tls_offset_inited = FALSE;
3652
3653 void
3654 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3655 {
3656         if (!tls_offset_inited) {
3657                 if (!getenv ("MONO_NO_TLS")) {
3658 #ifdef PLATFORM_WIN32
3659                         /* 
3660                          * We need to init this multiple times, since when we are first called, the key might not
3661                          * be initialized yet.
3662                          */
3663                         appdomain_tls_offset = mono_domain_get_tls_key ();
3664                         lmf_tls_offset = mono_get_jit_tls_key ();
3665                         thread_tls_offset = mono_thread_get_tls_key ();
3666
3667                         /* Only 64 tls entries can be accessed using inline code */
3668                         if (appdomain_tls_offset >= 64)
3669                                 appdomain_tls_offset = -1;
3670                         if (lmf_tls_offset >= 64)
3671                                 lmf_tls_offset = -1;
3672                         if (thread_tls_offset >= 64)
3673                                 thread_tls_offset = -1;
3674 #else
3675                         tls_offset_inited = TRUE;
3676                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3677                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3678                         thread_tls_offset = mono_thread_get_tls_offset ();
3679 #endif
3680                 }
3681         }               
3682
3683 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
3684         setup_stack (tls);
3685 #endif
3686 }
3687
3688 void
3689 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3690 {
3691 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
3692         struct sigaltstack sa;
3693
3694         sa.ss_sp = tls->signal_stack;
3695         sa.ss_size = SIGNAL_STACK_SIZE;
3696         sa.ss_flags = SS_DISABLE;
3697         sigaltstack  (&sa, NULL);
3698
3699         if (tls->signal_stack)
3700                 munmap (tls->signal_stack, SIGNAL_STACK_SIZE);
3701 #endif
3702 }
3703
3704 void
3705 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3706 {
3707
3708         /* add the this argument */
3709         if (this_reg != -1) {
3710                 MonoInst *this;
3711                 MONO_INST_NEW (cfg, this, OP_OUTARG);
3712                 this->type = this_type;
3713                 this->sreg1 = this_reg;
3714                 mono_bblock_add_inst (cfg->cbb, this);
3715         }
3716
3717         if (vt_reg != -1) {
3718                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
3719                 MonoInst *vtarg;
3720
3721                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3722                         /*
3723                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3724                          * the stack. Save the address here, so the call instruction can
3725                          * access it.
3726                          */
3727                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3728                         vtarg->inst_destbasereg = X86_ESP;
3729                         vtarg->inst_offset = inst->stack_usage;
3730                         vtarg->sreg1 = vt_reg;
3731                         mono_bblock_add_inst (cfg->cbb, vtarg);
3732                 }
3733                 else {
3734                         MonoInst *vtarg;
3735                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3736                         vtarg->type = STACK_MP;
3737                         vtarg->sreg1 = vt_reg;
3738                         mono_bblock_add_inst (cfg->cbb, vtarg);
3739                 }
3740
3741                 g_free (cinfo);
3742         }
3743 }
3744
3745
3746 MonoInst*
3747 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3748 {
3749         MonoInst *ins = NULL;
3750
3751         if (cmethod->klass == mono_defaults.math_class) {
3752                 if (strcmp (cmethod->name, "Sin") == 0) {
3753                         MONO_INST_NEW (cfg, ins, OP_SIN);
3754                         ins->inst_i0 = args [0];
3755                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3756                         MONO_INST_NEW (cfg, ins, OP_COS);
3757                         ins->inst_i0 = args [0];
3758                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3759                         MONO_INST_NEW (cfg, ins, OP_TAN);
3760                         ins->inst_i0 = args [0];
3761                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3762                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3763                         ins->inst_i0 = args [0];
3764                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3765                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3766                         ins->inst_i0 = args [0];
3767                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3768                         MONO_INST_NEW (cfg, ins, OP_ABS);
3769                         ins->inst_i0 = args [0];
3770                 }
3771 #if 0
3772                 /* OP_FREM is not IEEE compatible */
3773                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3774                         MONO_INST_NEW (cfg, ins, OP_FREM);
3775                         ins->inst_i0 = args [0];
3776                         ins->inst_i1 = args [1];
3777                 }
3778 #endif
3779         } else if (cmethod->klass == mono_defaults.thread_class &&
3780                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3781                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3782         } else if(cmethod->klass->image == mono_defaults.corlib &&
3783                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3784                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3785
3786                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3787                         MonoInst *ins_iconst;
3788
3789                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3790                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3791                         ins_iconst->inst_c0 = 1;
3792
3793                         ins->inst_i0 = args [0];
3794                         ins->inst_i1 = ins_iconst;
3795                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3796                         MonoInst *ins_iconst;
3797
3798                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3799                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3800                         ins_iconst->inst_c0 = -1;
3801
3802                         ins->inst_i0 = args [0];
3803                         ins->inst_i1 = ins_iconst;
3804                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3805                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3806
3807                         ins->inst_i0 = args [0];
3808                         ins->inst_i1 = args [1];
3809                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3810                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
3811
3812                         ins->inst_i0 = args [0];
3813                         ins->inst_i1 = args [1];
3814                 }
3815         }
3816
3817         return ins;
3818 }
3819
3820
3821 gboolean
3822 mono_arch_print_tree (MonoInst *tree, int arity)
3823 {
3824         return 0;
3825 }
3826
3827 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3828 {
3829         MonoInst* ins;
3830         
3831         if (appdomain_tls_offset == -1)
3832                 return NULL;
3833
3834         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3835         ins->inst_offset = appdomain_tls_offset;
3836         return ins;
3837 }
3838
3839 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3840 {
3841         MonoInst* ins;
3842
3843         if (thread_tls_offset == -1)
3844                 return NULL;
3845
3846         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3847         ins->inst_offset = thread_tls_offset;
3848         return ins;
3849 }
3850
3851 guint32
3852 mono_arch_get_patch_offset (guint8 *code)
3853 {
3854         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3855                 return 2;
3856         else if ((code [0] == 0xba))
3857                 return 1;
3858         else if ((code [0] == 0x68))
3859                 /* push IMM */
3860                 return 1;
3861         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3862                 /* push <OFFSET>(<REG>) */
3863                 return 2;
3864         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3865                 /* call *<OFFSET>(<REG>) */
3866                 return 2;
3867         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3868                 /* fldl <ADDR> */
3869                 return 2;
3870         else if ((code [0] == 0x58) && (code [1] == 0x05))
3871                 /* pop %eax; add <OFFSET>, %eax */
3872                 return 2;
3873         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3874                 /* pop <REG>; add <OFFSET>, <REG> */
3875                 return 3;
3876         else {
3877                 g_assert_not_reached ();
3878                 return -1;
3879         }
3880 }
3881
3882 gpointer*
3883 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
3884 {
3885         guint8 reg = 0;
3886         gint32 disp = 0;
3887
3888         /* go to the start of the call instruction
3889          *
3890          * address_byte = (m << 6) | (o << 3) | reg
3891          * call opcode: 0xff address_byte displacement
3892          * 0xff m=1,o=2 imm8
3893          * 0xff m=2,o=2 imm32
3894          */
3895         code -= 6;
3896         if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
3897                 reg = code [4] & 0x07;
3898                 disp = (signed char)code [5];
3899         } else {
3900                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
3901                         reg = code [1] & 0x07;
3902                         disp = *((gint32*)(code + 2));
3903                 } else if ((code [1] == 0xe8)) {
3904                         return NULL;
3905                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
3906                         /*
3907                          * This is a interface call: should check the above code can't catch it earlier 
3908                          * 8b 40 30   mov    0x30(%eax),%eax
3909                          * ff 10      call   *(%eax)
3910                          */
3911                         disp = 0;
3912                         reg = code [5] & 0x07;
3913                 }
3914                 else
3915                         return NULL;
3916         }
3917
3918         return (gpointer*)(((gint32)(regs [reg])) + disp);
3919 }
3920
3921 gpointer* 
3922 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
3923 {
3924         guint8 reg = 0;
3925         gint32 disp = 0;
3926
3927         code -= 7;
3928         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
3929                 reg = x86_modrm_rm (code [1]);
3930                 disp = code [4];
3931
3932                 if (reg == X86_EAX)
3933                         return NULL;
3934                 else
3935                         return (gpointer*)(((gint32)(regs [reg])) + disp);
3936         }
3937
3938         return NULL;
3939 }