2 * tramp-x86.c: JIT trampoline code for x86
5 * Dietmar Maurer (dietmar@ximian.com)
7 * (C) 2001 Ximian, Inc.
13 #include <mono/metadata/appdomain.h>
14 #include <mono/metadata/metadata-internals.h>
15 #include <mono/metadata/marshal.h>
16 #include <mono/metadata/tabledefs.h>
17 #include <mono/metadata/mono-debug.h>
18 #include <mono/metadata/mono-debug-debugger.h>
19 #include <mono/arch/x86/x86-codegen.h>
21 #ifdef HAVE_VALGRIND_MEMCHECK_H
22 #include <valgrind/memcheck.h>
28 static guint8* nullified_class_init_trampoline;
31 * mono_arch_get_unbox_trampoline:
32 * @gsctx: the generic sharing context
34 * @addr: pointer to native code for @m
36 * when value type methods are called through the vtable we need to unbox the
37 * this argument. This method returns a pointer to a trampoline which does
38 * unboxing before calling the method
41 mono_arch_get_unbox_trampoline (MonoGenericSharingContext *gsctx, MonoMethod *m, gpointer addr)
45 MonoDomain *domain = mono_domain_get ();
47 if (MONO_TYPE_ISSTRUCT (mono_method_signature (m)->ret))
50 mono_domain_lock (domain);
51 start = code = mono_code_manager_reserve (domain->code_mp, 16);
52 mono_domain_unlock (domain);
54 x86_alu_membase_imm (code, X86_ADD, X86_ESP, this_pos, sizeof (MonoObject));
55 x86_jump_code (code, addr);
56 g_assert ((code - start) < 16);
62 mono_arch_patch_callsite (guint8 *method_start, guint8 *orig_code, guint8 *addr)
66 gboolean can_write = mono_breakpoint_clean_code (method_start, orig_code, 8, buf, sizeof (buf));
69 if (mono_running_on_valgrind ())
72 /* go to the start of the call instruction
74 * address_byte = (m << 6) | (o << 3) | reg
75 * call opcode: 0xff address_byte displacement
81 if ((code [1] == 0xe8)) {
83 InterlockedExchange ((gint32*)(orig_code + 2), (guint)addr - ((guint)orig_code + 1) - 5);
85 #ifdef HAVE_VALGRIND_MEMCHECK_H
86 /* Tell valgrind to recompile the patched code */
87 //VALGRIND_DISCARD_TRANSLATIONS (code + 2, code + 6);
90 } else if (code [1] == 0xe9) {
91 /* A PLT entry: jmp <DISP> */
93 InterlockedExchange ((gint32*)(orig_code + 2), (guint)addr - ((guint)orig_code + 1) - 5);
95 printf ("Invalid trampoline sequence: %x %x %x %x %x %x %x\n", code [0], code [1], code [2], code [3],
96 code [4], code [5], code [6]);
97 g_assert_not_reached ();
102 mono_arch_patch_plt_entry (guint8 *code, guint8 *addr)
104 /* A PLT entry: jmp <DISP> */
105 g_assert (code [0] == 0xe9);
107 if (!mono_running_on_valgrind ())
108 InterlockedExchange ((gint32*)(code + 1), (guint)addr - (guint)code - 5);
112 mono_arch_nullify_class_init_trampoline (guint8 *code, gssize *regs)
115 gboolean can_write = mono_breakpoint_clean_code (NULL, code, 6, buf, sizeof (buf));
121 if (code [0] == 0xe8) {
122 if (!mono_running_on_valgrind ()) {
125 * Thread safe code patching using the algorithm from the paper
126 * 'Practicing JUDO: Java Under Dynamic Optimizations'
129 * First atomically change the the first 2 bytes of the call to a
133 InterlockedExchange ((gint32*)code, ops);
135 /* Then change the other bytes to a nop */
140 /* Then atomically change the first 4 bytes to a nop as well */
142 InterlockedExchange ((gint32*)code, ops);
143 #ifdef HAVE_VALGRIND_MEMCHECK_H
144 /* FIXME: the calltree skin trips on the self modifying code above */
146 /* Tell valgrind to recompile the patched code */
147 //VALGRIND_DISCARD_TRANSLATIONS (code, code + 8);
150 } else if (code [0] == 0x90 || code [0] == 0xeb) {
151 /* Already changed by another thread */
153 } else if ((code [-1] == 0xff) && (x86_modrm_reg (code [0]) == 0x2)) {
154 /* call *<OFFSET>(<REG>) -> Call made from AOT code */
155 gpointer *vtable_slot;
157 vtable_slot = mono_arch_get_vcall_slot_addr (code + 5, (gpointer*)regs);
158 g_assert (vtable_slot);
160 *vtable_slot = nullified_class_init_trampoline;
162 printf ("Invalid trampoline sequence: %x %x %x %x %x %x %x\n", code [0], code [1], code [2], code [3],
163 code [4], code [5], code [6]);
164 g_assert_not_reached ();
169 mono_arch_nullify_plt_entry (guint8 *code)
171 if (!mono_running_on_valgrind ()) {
175 InterlockedExchange ((gint32*)code, ops);
177 /* Then change the other bytes to a nop */
182 /* Change the first byte to a nop */
184 InterlockedExchange ((gint32*)code, ops);
189 mono_arch_create_trampoline_code (MonoTrampolineType tramp_type)
191 guint8 *buf, *code, *tramp;
192 int pushed_args, pushed_args_caller_saved;
194 code = buf = mono_global_codeman_reserve (256);
196 /* Note that there is a single argument to the trampoline
197 * and it is stored at: esp + pushed_args * sizeof (gpointer)
198 * the ret address is at: esp + (pushed_args + 1) * sizeof (gpointer)
201 /* If this is a generic class init the argument is not on the
202 * stack yet but in MONO_ARCH_VTABLE_REG. We first check
203 * whether the vtable is already initialized in which case we
204 * just return. Otherwise we push it and continue.
206 if (tramp_type == MONO_TRAMPOLINE_GENERIC_CLASS_INIT) {
207 static int byte_offset = -1;
208 static guint8 bitmask;
213 mono_marshal_find_bitfield_offset (MonoVTable, initialized, &byte_offset, &bitmask);
215 x86_test_membase_imm (buf, MONO_ARCH_VTABLE_REG, byte_offset, bitmask);
217 x86_branch8 (buf, X86_CC_Z, -1, 1);
221 x86_patch (jump, buf);
222 x86_push_reg (buf, MONO_ARCH_VTABLE_REG);
225 /* Put all registers into an array on the stack
226 * If this code is changed, make sure to update the offset value in
227 * mono_arch_find_this_argument () in mini-x86.c.
229 x86_push_reg (buf, X86_EDI);
230 x86_push_reg (buf, X86_ESI);
231 x86_push_reg (buf, X86_EBP);
232 x86_push_reg (buf, X86_ESP);
233 x86_push_reg (buf, X86_EBX);
234 x86_push_reg (buf, X86_EDX);
235 x86_push_reg (buf, X86_ECX);
236 x86_push_reg (buf, X86_EAX);
238 pushed_args_caller_saved = pushed_args = 8;
240 /* Align stack on apple */
241 x86_alu_reg_imm (buf, X86_SUB, X86_ESP, 4);
247 /* save the IP (caller ip) */
248 if (tramp_type == MONO_TRAMPOLINE_JUMP)
249 x86_push_imm (buf, 0);
251 x86_push_membase (buf, X86_ESP, (pushed_args + 1) * sizeof (gpointer));
255 x86_push_reg (buf, X86_EBP);
256 x86_push_reg (buf, X86_ESI);
257 x86_push_reg (buf, X86_EDI);
258 x86_push_reg (buf, X86_EBX);
263 x86_push_reg (buf, X86_ESP);
264 /* Adjust ESP so it points to the previous frame */
265 x86_alu_membase_imm (buf, X86_ADD, X86_ESP, 0, (pushed_args + 2) * 4);
269 /* save method info */
270 if ((tramp_type == MONO_TRAMPOLINE_JIT) || (tramp_type == MONO_TRAMPOLINE_JUMP))
271 x86_push_membase (buf, X86_ESP, pushed_args * sizeof (gpointer));
273 x86_push_imm (buf, 0);
277 /* On apple, the stack is correctly aligned to 16 bytes because pushed_args is
278 * 16 and there is the extra trampoline arg + the return ip pushed by call
279 * FIXME: Note that if an exception happens while some args are pushed
280 * on the stack, the stack will be misaligned.
282 g_assert (pushed_args == 16);
284 /* get the address of lmf for the current thread */
285 x86_call_code (buf, mono_get_lmf_addr);
287 x86_push_reg (buf, X86_EAX);
288 /* push *lfm (previous_lmf) */
289 x86_push_membase (buf, X86_EAX, 0);
290 /* Signal to mono_arch_find_jit_info () that this is a trampoline frame */
291 x86_alu_membase_imm (buf, X86_ADD, X86_ESP, 0, 1);
293 x86_mov_membase_reg (buf, X86_EAX, 0, X86_ESP, 4);
298 /* starting the call sequence */
300 /* FIXME: Push the trampoline address */
301 x86_push_imm (buf, 0);
305 /* push the method info */
306 x86_push_membase (buf, X86_ESP, pushed_args * sizeof (gpointer));
310 /* push the return address onto the stack */
311 if (tramp_type == MONO_TRAMPOLINE_JUMP)
312 x86_push_imm (buf, 0);
314 x86_push_membase (buf, X86_ESP, (pushed_args + 1) * sizeof (gpointer));
316 /* push the address of the register array */
317 x86_lea_membase (buf, X86_EAX, X86_ESP, (pushed_args - 8) * sizeof (gpointer));
318 x86_push_reg (buf, X86_EAX);
323 /* check the stack is aligned after the ret ip is pushed */
324 /*x86_mov_reg_reg (buf, X86_EDX, X86_ESP, 4);
325 x86_alu_reg_imm (buf, X86_AND, X86_EDX, 15);
326 x86_alu_reg_imm (buf, X86_CMP, X86_EDX, 0);
327 x86_branch_disp (buf, X86_CC_Z, 3, FALSE);
328 x86_breakpoint (buf);*/
331 tramp = (guint8*)mono_get_trampoline_func (tramp_type);
332 x86_call_code (buf, tramp);
334 x86_alu_reg_imm (buf, X86_ADD, X86_ESP, 4*4);
338 /* Check for thread interruption */
339 /* This is not perf critical code so no need to check the interrupt flag */
340 x86_push_reg (buf, X86_EAX);
341 x86_call_code (buf, (guint8*)mono_thread_force_interruption_checkpoint);
342 x86_pop_reg (buf, X86_EAX);
346 /* ebx = previous_lmf */
347 x86_pop_reg (buf, X86_EBX);
349 x86_alu_reg_imm (buf, X86_SUB, X86_EBX, 1);
352 x86_pop_reg (buf, X86_EDI);
355 /* *(lmf) = previous_lmf */
356 x86_mov_membase_reg (buf, X86_EDI, 0, X86_EBX, 4);
358 /* discard method info */
359 x86_pop_reg (buf, X86_ESI);
363 x86_pop_reg (buf, X86_ESI);
366 /* restore caller saved regs */
367 x86_pop_reg (buf, X86_EBX);
368 x86_pop_reg (buf, X86_EDI);
369 x86_pop_reg (buf, X86_ESI);
370 x86_pop_reg (buf, X86_EBP);
374 /* discard save IP */
375 x86_alu_reg_imm (buf, X86_ADD, X86_ESP, 4);
378 /* restore LMF end */
380 /* Restore caller saved registers */
381 x86_mov_reg_membase (buf, X86_ECX, X86_ESP, (pushed_args - pushed_args_caller_saved + X86_ECX) * 4, 4);
382 x86_mov_reg_membase (buf, X86_EDX, X86_ESP, (pushed_args - pushed_args_caller_saved + X86_EDX) * 4, 4);
384 /* Pop saved reg array + stack align + method ptr */
385 x86_alu_reg_imm (buf, X86_ADD, X86_ESP, 10 * 4);
389 /* We've popped one more stack item than we've pushed (the
390 method ptr argument), so we must end up at -1. */
391 g_assert (pushed_args == -1);
393 if (tramp_type == MONO_TRAMPOLINE_CLASS_INIT ||
394 tramp_type == MONO_TRAMPOLINE_GENERIC_CLASS_INIT ||
395 tramp_type == MONO_TRAMPOLINE_RGCTX_LAZY_FETCH)
398 /* call the compiled method */
399 x86_jump_reg (buf, X86_EAX);
401 g_assert ((buf - code) <= 256);
403 if (tramp_type == MONO_TRAMPOLINE_CLASS_INIT) {
404 /* Initialize the nullified class init trampoline used in the AOT case */
405 nullified_class_init_trampoline = buf = mono_global_codeman_reserve (16);
412 #define TRAMPOLINE_SIZE 10
415 mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_type, MonoDomain *domain, guint32 *code_len)
417 guint8 *code, *buf, *tramp;
419 tramp = mono_get_trampoline_code (tramp_type);
421 mono_domain_lock (domain);
422 code = buf = mono_code_manager_reserve_align (domain->code_mp, TRAMPOLINE_SIZE, 4);
423 mono_domain_unlock (domain);
425 x86_push_imm (buf, arg1);
426 x86_jump_code (buf, tramp);
427 g_assert ((buf - code) <= TRAMPOLINE_SIZE);
429 mono_arch_flush_icache (code, buf - code);
432 *code_len = buf - code;
438 mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot)
440 guint8 *tramp = mono_get_trampoline_code (MONO_TRAMPOLINE_RGCTX_LAZY_FETCH);
442 guint8 **rgctx_null_jumps;
450 mrgctx = MONO_RGCTX_SLOT_IS_MRGCTX (slot);
451 index = MONO_RGCTX_SLOT_INDEX (slot);
453 index += sizeof (MonoMethodRuntimeGenericContext) / sizeof (gpointer);
454 for (depth = 0; ; ++depth) {
455 int size = mono_class_rgctx_get_array_size (depth, mrgctx);
457 if (index < size - 1)
462 tramp_size = 36 + 6 * depth;
464 code = buf = mono_global_codeman_reserve (tramp_size);
466 rgctx_null_jumps = g_malloc (sizeof (guint8*) * (depth + 2));
468 /* load vtable/mrgctx ptr */
469 x86_mov_reg_membase (buf, X86_EAX, X86_ESP, 4, 4);
471 /* load rgctx ptr from vtable */
472 x86_mov_reg_membase (buf, X86_EAX, X86_EAX, G_STRUCT_OFFSET (MonoVTable, runtime_generic_context), 4);
473 /* is the rgctx ptr null? */
474 x86_test_reg_reg (buf, X86_EAX, X86_EAX);
475 /* if yes, jump to actual trampoline */
476 rgctx_null_jumps [0] = buf;
477 x86_branch8 (buf, X86_CC_Z, -1, 1);
480 for (i = 0; i < depth; ++i) {
481 /* load ptr to next array */
482 if (mrgctx && i == 0)
483 x86_mov_reg_membase (buf, X86_EAX, X86_EAX, sizeof (MonoMethodRuntimeGenericContext), 4);
485 x86_mov_reg_membase (buf, X86_EAX, X86_EAX, 0, 4);
486 /* is the ptr null? */
487 x86_test_reg_reg (buf, X86_EAX, X86_EAX);
488 /* if yes, jump to actual trampoline */
489 rgctx_null_jumps [i + 1] = buf;
490 x86_branch8 (buf, X86_CC_Z, -1, 1);
494 x86_mov_reg_membase (buf, X86_EAX, X86_EAX, sizeof (gpointer) * (index + 1), 4);
495 /* is the slot null? */
496 x86_test_reg_reg (buf, X86_EAX, X86_EAX);
497 /* if yes, jump to actual trampoline */
498 rgctx_null_jumps [depth + 1] = buf;
499 x86_branch8 (buf, X86_CC_Z, -1, 1);
500 /* otherwise return */
503 for (i = mrgctx ? 1 : 0; i <= depth + 1; ++i)
504 x86_patch (rgctx_null_jumps [i], buf);
506 g_free (rgctx_null_jumps);
509 * our stack looks like this (tos on top):
515 * the trampoline code expects it to look like this:
521 * whereas our caller expects to still have one argument on
522 * the stack when we return, so we transform the stack into
530 * which actually only requires us to push the vtable ptr, and
531 * the "old" vtable ptr becomes the dummy.
534 x86_push_membase (buf, X86_ESP, 4);
536 x86_mov_reg_imm (buf, X86_EAX, slot);
537 x86_jump_code (buf, tramp);
539 mono_arch_flush_icache (code, buf - code);
541 g_assert (buf - code <= tramp_size);
547 mono_arch_get_rgctx_lazy_fetch_offset (gpointer *regs)
549 return (guint32)(regs [X86_EAX]);
553 mono_arch_invalidate_method (MonoJitInfo *ji, void *func, gpointer func_arg)
555 /* FIXME: This is not thread safe */
556 guint8 *code = ji->code_start;
558 x86_push_imm (code, func_arg);
559 x86_call_code (code, (guint8*)func);