2 * tramp-amd64.c: JIT trampoline code for amd64
5 * Dietmar Maurer (dietmar@ximian.com)
6 * Zoltan Varga (vargaz@gmail.com)
8 * (C) 2001 Ximian, Inc.
9 * Copyright 2003-2011 Novell, Inc (http://www.novell.com)
10 * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
11 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
17 #include <mono/metadata/abi-details.h>
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/marshal.h>
20 #include <mono/metadata/tabledefs.h>
21 #include <mono/metadata/mono-debug-debugger.h>
22 #include <mono/metadata/profiler-private.h>
23 #include <mono/metadata/gc-internals.h>
24 #include <mono/arch/amd64/amd64-codegen.h>
26 #include <mono/utils/memcheck.h>
29 #include "mini-amd64.h"
30 #include "debugger-agent.h"
32 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
34 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
38 * mono_arch_get_unbox_trampoline:
40 * @addr: pointer to native code for @m
42 * when value type methods are called through the vtable we need to unbox the
43 * this argument. This method returns a pointer to a trampoline which does
44 * unboxing before calling the method
47 mono_arch_get_unbox_trampoline (MonoMethod *m, gpointer addr)
51 int this_reg, size = 20;
53 MonoDomain *domain = mono_domain_get ();
55 this_reg = mono_arch_get_this_arg_reg (NULL);
57 start = code = (guint8 *)mono_domain_code_reserve (domain, size);
59 unwind_ops = mono_arch_get_cie_program ();
61 amd64_alu_reg_imm (code, X86_ADD, this_reg, sizeof (MonoObject));
62 /* FIXME: Optimize this */
63 amd64_mov_reg_imm (code, AMD64_RAX, addr);
64 amd64_jump_reg (code, AMD64_RAX);
65 g_assert ((code - start) < size);
67 mono_arch_flush_icache (start, code - start);
68 mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_UNBOX_TRAMPOLINE, m);
70 mono_tramp_info_register (mono_tramp_info_create (NULL, start, code - start, NULL, unwind_ops), domain);
76 * mono_arch_get_static_rgctx_trampoline:
78 * Create a trampoline which sets RGCTX_REG to MRGCTX, then jumps to ADDR.
81 mono_arch_get_static_rgctx_trampoline (MonoMethod *m, MonoMethodRuntimeGenericContext *mrgctx, gpointer addr)
87 MonoDomain *domain = mono_domain_get ();
89 #ifdef MONO_ARCH_NOMAP32BIT
92 /* AOTed code could still have a non-32 bit address */
93 if ((((guint64)addr) >> 32) == 0)
99 start = code = (guint8 *)mono_domain_code_reserve (domain, buf_len);
101 unwind_ops = mono_arch_get_cie_program ();
103 amd64_mov_reg_imm (code, MONO_ARCH_RGCTX_REG, mrgctx);
104 amd64_jump_code (code, addr);
105 g_assert ((code - start) < buf_len);
107 mono_arch_flush_icache (start, code - start);
108 mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_GENERICS_TRAMPOLINE, NULL);
110 mono_tramp_info_register (mono_tramp_info_create (NULL, start, code - start, NULL, unwind_ops), domain);
114 #endif /* !DISABLE_JIT */
117 // Workaround lack of Valgrind support for 64-bit Windows
118 #define VALGRIND_DISCARD_TRANSLATIONS(...)
122 * mono_arch_patch_callsite:
124 * Patch the callsite whose address is given by ORIG_CODE so it calls ADDR. ORIG_CODE
125 * points to the pc right after the call.
128 mono_arch_patch_callsite (guint8 *method_start, guint8 *orig_code, guint8 *addr)
132 gboolean can_write = mono_breakpoint_clean_code (method_start, orig_code, 14, buf, sizeof (buf));
136 /* mov 64-bit imm into r11 (followed by call reg?) or direct call*/
137 if (((code [-13] == 0x49) && (code [-12] == 0xbb)) || (code [-5] == 0xe8)) {
138 if (code [-5] != 0xe8) {
140 InterlockedExchangePointer ((gpointer*)(orig_code - 11), addr);
141 VALGRIND_DISCARD_TRANSLATIONS (orig_code - 11, sizeof (gpointer));
144 gboolean disp_32bit = ((((gint64)addr - (gint64)orig_code)) < (1 << 30)) && ((((gint64)addr - (gint64)orig_code)) > -(1 << 30));
146 if ((((guint64)(addr)) >> 32) != 0 && !disp_32bit) {
148 * This might happen with LLVM or when calling AOTed code. Create a thunk.
150 guint8 *thunk_start, *thunk_code;
152 thunk_start = thunk_code = (guint8 *)mono_domain_code_reserve (mono_domain_get (), 32);
153 amd64_jump_membase (thunk_code, AMD64_RIP, 0);
154 *(guint64*)thunk_code = (guint64)addr;
156 g_assert ((((guint64)(addr)) >> 32) == 0);
157 mono_arch_flush_icache (thunk_start, thunk_code - thunk_start);
158 mono_profiler_code_buffer_new (thunk_start, thunk_code - thunk_start, MONO_PROFILER_CODE_BUFFER_HELPER, NULL);
161 InterlockedExchange ((gint32*)(orig_code - 4), ((gint64)addr - (gint64)orig_code));
162 VALGRIND_DISCARD_TRANSLATIONS (orig_code - 5, 4);
166 else if ((code [-7] == 0x41) && (code [-6] == 0xff) && (code [-5] == 0x15)) {
167 /* call *<OFFSET>(%rip) */
168 gpointer *got_entry = (gpointer*)((guint8*)orig_code + (*(guint32*)(orig_code - 4)));
170 InterlockedExchangePointer (got_entry, addr);
171 VALGRIND_DISCARD_TRANSLATIONS (orig_code - 5, sizeof (gpointer));
178 mono_arch_create_llvm_native_thunk (MonoDomain *domain, guint8 *addr)
181 * The caller is LLVM code and the call displacement might exceed 32 bits. We can't determine the caller address, so
182 * we add a thunk every time.
183 * Since the caller is also allocated using the domain code manager, hopefully the displacement will fit into 32 bits.
184 * FIXME: Avoid this if possible if !MONO_ARCH_NOMAP32BIT and ADDR is 32 bits.
186 guint8 *thunk_start, *thunk_code;
188 thunk_start = thunk_code = (guint8 *)mono_domain_code_reserve (mono_domain_get (), 32);
189 amd64_jump_membase (thunk_code, AMD64_RIP, 0);
190 *(guint64*)thunk_code = (guint64)addr;
192 mono_arch_flush_icache (thunk_start, thunk_code - thunk_start);
193 mono_profiler_code_buffer_new (thunk_start, thunk_code - thunk_start, MONO_PROFILER_CODE_BUFFER_HELPER, NULL);
196 #endif /* !DISABLE_JIT */
199 mono_arch_patch_plt_entry (guint8 *code, gpointer *got, mgreg_t *regs, guint8 *addr)
202 gpointer *plt_jump_table_entry;
204 /* A PLT entry: jmp *<DISP>(%rip) */
205 g_assert (code [0] == 0xff);
206 g_assert (code [1] == 0x25);
208 disp = *(gint32*)(code + 2);
210 plt_jump_table_entry = (gpointer*)(code + 6 + disp);
212 InterlockedExchangePointer (plt_jump_table_entry, addr);
217 stack_unaligned (MonoTrampolineType tramp_type)
219 printf ("%d\n", tramp_type);
220 g_assert_not_reached ();
224 mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInfo **info, gboolean aot)
227 guint8 *buf, *code, *tramp, *br [2], *r11_save_code, *after_r11_save_code, *br_ex_check;
228 int i, lmf_offset, offset, res_offset, arg_offset, rax_offset, ex_offset, tramp_offset, ctx_offset, saved_regs_offset;
229 int r11_save_offset, saved_fpregs_offset, rbp_offset, framesize, orig_rsp_to_rbp_offset, cfa_offset;
231 GSList *unwind_ops = NULL;
232 MonoJumpInfo *ji = NULL;
233 const guint kMaxCodeSize = 630;
235 if (tramp_type == MONO_TRAMPOLINE_JUMP || tramp_type == MONO_TRAMPOLINE_HANDLER_BLOCK_GUARD)
240 code = buf = (guint8 *)mono_global_codeman_reserve (kMaxCodeSize);
242 /* Compute stack frame size and offsets */
244 rbp_offset = -offset;
246 offset += sizeof(mgreg_t);
247 rax_offset = -offset;
249 offset += sizeof(mgreg_t);
252 offset += sizeof(mgreg_t);
253 r11_save_offset = -offset;
255 offset += sizeof(mgreg_t);
256 tramp_offset = -offset;
258 offset += sizeof(gpointer);
259 arg_offset = -offset;
261 offset += sizeof(mgreg_t);
262 res_offset = -offset;
264 offset += sizeof (MonoContext);
265 ctx_offset = -offset;
266 saved_regs_offset = ctx_offset + MONO_STRUCT_OFFSET (MonoContext, gregs);
267 saved_fpregs_offset = ctx_offset + MONO_STRUCT_OFFSET (MonoContext, fregs);
269 offset += sizeof (MonoLMFTramp);
270 lmf_offset = -offset;
273 /* Reserve space where the callee can save the argument registers */
274 offset += 4 * sizeof (mgreg_t);
277 framesize = ALIGN_TO (offset, MONO_ARCH_FRAME_ALIGNMENT);
279 // CFA = sp + 16 (the trampoline address is on the stack)
281 mono_add_unwind_op_def_cfa (unwind_ops, code, buf, AMD64_RSP, 16);
282 // IP saved at CFA - 8
283 mono_add_unwind_op_offset (unwind_ops, code, buf, AMD64_RIP, -8);
285 orig_rsp_to_rbp_offset = 0;
286 r11_save_code = code;
287 /* Reserve space for the mov_membase_reg to save R11 */
289 after_r11_save_code = code;
291 /* Pop the return address off the stack */
292 amd64_pop_reg (code, AMD64_R11);
293 orig_rsp_to_rbp_offset += sizeof(mgreg_t);
295 cfa_offset -= sizeof(mgreg_t);
296 mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
299 * Allocate a new stack frame
301 amd64_push_reg (code, AMD64_RBP);
302 cfa_offset += sizeof(mgreg_t);
303 mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
304 mono_add_unwind_op_offset (unwind_ops, code, buf, AMD64_RBP, - cfa_offset);
306 orig_rsp_to_rbp_offset -= sizeof(mgreg_t);
307 amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof(mgreg_t));
308 mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, AMD64_RBP);
309 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, framesize);
311 /* Compute the trampoline address from the return address */
313 /* 7 = length of call *<offset>(rip) */
314 amd64_alu_reg_imm (code, X86_SUB, AMD64_R11, 7);
316 /* 5 = length of amd64_call_membase () */
317 amd64_alu_reg_imm (code, X86_SUB, AMD64_R11, 5);
319 amd64_mov_membase_reg (code, AMD64_RBP, tramp_offset, AMD64_R11, sizeof(gpointer));
321 /* Save all registers */
322 for (i = 0; i < AMD64_NREG; ++i) {
323 if (i == AMD64_RBP) {
324 /* RAX is already saved */
325 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RBP, rbp_offset, sizeof(mgreg_t));
326 amd64_mov_membase_reg (code, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), AMD64_RAX, sizeof(mgreg_t));
327 } else if (i == AMD64_RIP) {
329 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, 8, sizeof(gpointer));
331 amd64_mov_reg_imm (code, AMD64_R11, 0);
332 amd64_mov_membase_reg (code, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), AMD64_R11, sizeof(mgreg_t));
333 } else if (i == AMD64_RSP) {
334 amd64_mov_reg_reg (code, AMD64_R11, AMD64_RSP, sizeof(mgreg_t));
335 amd64_alu_reg_imm (code, X86_ADD, AMD64_R11, framesize + 16);
336 amd64_mov_membase_reg (code, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), AMD64_R11, sizeof(mgreg_t));
337 } else if (i != AMD64_R11) {
338 amd64_mov_membase_reg (code, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), i, sizeof(mgreg_t));
340 /* We have to save R11 right at the start of
341 the trampoline code because it's used as a
343 /* This happens before the frame is set up, so it goes into the redzone */
344 amd64_mov_membase_reg (r11_save_code, AMD64_RSP, r11_save_offset + orig_rsp_to_rbp_offset, i, sizeof(mgreg_t));
345 g_assert (r11_save_code == after_r11_save_code);
347 /* Copy from the save slot into the register array slot */
348 amd64_mov_reg_membase (code, i, AMD64_RSP, r11_save_offset + orig_rsp_to_rbp_offset, sizeof(mgreg_t));
349 amd64_mov_membase_reg (code, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), i, sizeof(mgreg_t));
351 /* cfa = rbp + cfa_offset */
352 mono_add_unwind_op_offset (unwind_ops, code, buf, i, - cfa_offset + saved_regs_offset + (i * sizeof (mgreg_t)));
354 for (i = 0; i < 8; ++i)
355 amd64_movsd_membase_reg (code, AMD64_RBP, saved_fpregs_offset + (i * sizeof(mgreg_t)), i);
357 /* Check that the stack is aligned */
358 amd64_mov_reg_reg (code, AMD64_R11, AMD64_RSP, sizeof (mgreg_t));
359 amd64_alu_reg_imm (code, X86_AND, AMD64_R11, 15);
360 amd64_alu_reg_imm (code, X86_CMP, AMD64_R11, 0);
362 amd64_branch_disp (code, X86_CC_Z, 0, FALSE);
364 amd64_mov_reg_imm (code, AMD64_R11, 0);
365 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, 8);
367 amd64_mov_reg_imm (code, MONO_AMD64_ARG_REG1, tramp_type);
368 amd64_mov_reg_imm (code, AMD64_R11, stack_unaligned);
369 amd64_call_reg (code, AMD64_R11);
371 mono_amd64_patch (br [0], code);
372 //amd64_breakpoint (code);
374 if (tramp_type != MONO_TRAMPOLINE_HANDLER_BLOCK_GUARD) {
375 /* Obtain the trampoline argument which is encoded in the instruction stream */
377 /* Load the GOT offset */
378 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, tramp_offset, sizeof(gpointer));
380 * r11 points to a call *<offset>(%rip) instruction, load the
381 * pc-relative offset from the instruction itself.
383 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_R11, 3, 4);
384 /* 7 is the length of the call, 8 is the offset to the next got slot */
385 amd64_alu_reg_imm_size (code, X86_ADD, AMD64_RAX, 7 + sizeof (gpointer), sizeof(gpointer));
386 /* Compute the address of the GOT slot */
387 amd64_alu_reg_reg_size (code, X86_ADD, AMD64_R11, AMD64_RAX, sizeof(gpointer));
389 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, sizeof(gpointer));
391 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, tramp_offset, sizeof(gpointer));
392 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_R11, 5, 1);
393 amd64_widen_reg (code, AMD64_RAX, AMD64_RAX, TRUE, FALSE);
394 amd64_alu_reg_imm_size (code, X86_CMP, AMD64_RAX, 4, 1);
396 x86_branch8 (code, X86_CC_NE, 6, FALSE);
397 /* 32 bit immediate */
398 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 6, 4);
400 x86_jump8 (code, 10);
401 /* 64 bit immediate */
402 mono_amd64_patch (br [0], code);
403 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 6, 8);
404 mono_amd64_patch (br [1], code);
406 amd64_mov_membase_reg (code, AMD64_RBP, arg_offset, AMD64_R11, sizeof(gpointer));
408 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, saved_regs_offset + (MONO_AMD64_ARG_REG1 * sizeof(mgreg_t)), sizeof(mgreg_t));
409 amd64_mov_membase_reg (code, AMD64_RBP, arg_offset, AMD64_R11, sizeof(gpointer));
416 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, 8, sizeof(gpointer));
418 amd64_mov_reg_imm (code, AMD64_R11, 0);
419 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rip), AMD64_R11, sizeof(mgreg_t));
421 amd64_mov_reg_reg (code, AMD64_R11, AMD64_RSP, sizeof(mgreg_t));
422 amd64_alu_reg_imm (code, X86_ADD, AMD64_R11, framesize + 16);
423 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rsp), AMD64_R11, sizeof(mgreg_t));
424 /* Save pointer to context */
425 amd64_lea_membase (code, AMD64_R11, AMD64_RBP, ctx_offset);
426 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMFTramp, ctx), AMD64_R11, sizeof(mgreg_t));
429 code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_get_lmf_addr");
431 amd64_mov_reg_imm (code, AMD64_R11, mono_get_lmf_addr);
433 amd64_call_reg (code, AMD64_R11);
436 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMFTramp, lmf_addr), AMD64_RAX, sizeof(gpointer));
437 /* Save previous_lmf */
438 /* Set the lowest bit to signal that this LMF has the ip field set */
439 /* Set the third lowest bit to signal that this is a MonoLMFTramp structure */
440 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, sizeof(gpointer));
441 amd64_alu_reg_imm_size (code, X86_ADD, AMD64_R11, 0x5, sizeof(gpointer));
442 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, sizeof(gpointer));
444 amd64_lea_membase (code, AMD64_R11, AMD64_RBP, lmf_offset);
445 amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, sizeof(gpointer));
449 /* Arg1 is the pointer to the saved registers */
450 amd64_lea_membase (code, AMD64_ARG_REG1, AMD64_RBP, saved_regs_offset);
452 /* Arg2 is the address of the calling code */
454 amd64_mov_reg_membase (code, AMD64_ARG_REG2, AMD64_RBP, 8, sizeof(gpointer));
456 amd64_mov_reg_imm (code, AMD64_ARG_REG2, 0);
458 /* Arg3 is the method/vtable ptr */
459 amd64_mov_reg_membase (code, AMD64_ARG_REG3, AMD64_RBP, arg_offset, sizeof(gpointer));
461 /* Arg4 is the trampoline address */
462 amd64_mov_reg_membase (code, AMD64_ARG_REG4, AMD64_RBP, tramp_offset, sizeof(gpointer));
465 char *icall_name = g_strdup_printf ("trampoline_func_%d", tramp_type);
466 code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, icall_name);
468 tramp = (guint8*)mono_get_trampoline_func (tramp_type);
469 amd64_mov_reg_imm (code, AMD64_R11, tramp);
471 amd64_call_reg (code, AMD64_R11);
472 amd64_mov_membase_reg (code, AMD64_RBP, res_offset, AMD64_RAX, sizeof(mgreg_t));
475 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RBP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, previous_lmf), sizeof(gpointer));
476 amd64_alu_reg_imm_size (code, X86_SUB, AMD64_RCX, 0x5, sizeof(gpointer));
477 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, lmf_offset + MONO_STRUCT_OFFSET (MonoLMFTramp, lmf_addr), sizeof(gpointer));
478 amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, sizeof(gpointer));
481 * Save rax to the stack, after the leave instruction, this will become part of
484 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RBP, res_offset, sizeof(mgreg_t));
485 amd64_mov_membase_reg (code, AMD64_RBP, rax_offset, AMD64_RAX, sizeof(mgreg_t));
487 /* Check for thread interruption */
488 /* This is not perf critical code so no need to check the interrupt flag */
490 * Have to call the _force_ variant, since there could be a protected wrapper on the top of the stack.
493 code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_thread_force_interruption_checkpoint_noraise");
495 amd64_mov_reg_imm (code, AMD64_R11, (guint8*)mono_thread_force_interruption_checkpoint_noraise);
497 amd64_call_reg (code, AMD64_R11);
499 amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX);
501 amd64_branch8 (code, X86_CC_Z, -1, 1);
505 * We have an exception we want to throw in the caller's frame, so pop
506 * the trampoline frame and throw from the caller.
509 /* We are in the parent frame, the exception is in rax */
511 * EH is initialized after trampolines, so get the address of the variable
512 * which contains throw_exception, and load it from there.
515 /* Not really a jit icall */
516 code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "throw_exception_addr");
518 amd64_mov_reg_imm (code, AMD64_R11, (guint8*)mono_get_throw_exception_addr ());
520 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, sizeof(gpointer));
521 amd64_mov_reg_reg (code, AMD64_ARG_REG1, AMD64_RAX, sizeof(mgreg_t));
523 * We still have the original return value on the top of the stack, so the
524 * throw trampoline will use that as the throw site.
526 amd64_jump_reg (code, AMD64_R11);
529 mono_amd64_patch (br_ex_check, code);
531 /* Restore argument registers, r10 (imt method/rgxtx)
532 and rax (needed for direct calls to C vararg functions). */
533 for (i = 0; i < AMD64_NREG; ++i)
534 if (AMD64_IS_ARGUMENT_REG (i) || i == AMD64_R10 || i == AMD64_RAX)
535 amd64_mov_reg_membase (code, i, AMD64_RBP, saved_regs_offset + (i * sizeof(mgreg_t)), sizeof(mgreg_t));
536 for (i = 0; i < 8; ++i)
537 amd64_movsd_reg_membase (code, i, AMD64_RBP, saved_fpregs_offset + (i * sizeof(mgreg_t)));
541 cfa_offset -= sizeof (mgreg_t);
542 mono_add_unwind_op_def_cfa (unwind_ops, code, buf, AMD64_RSP, cfa_offset);
544 if (MONO_TRAMPOLINE_TYPE_MUST_RETURN (tramp_type)) {
546 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, rax_offset - sizeof(mgreg_t), sizeof(mgreg_t));
549 /* call the compiled method using the saved rax */
550 amd64_jump_membase (code, AMD64_RSP, rax_offset - sizeof(mgreg_t));
553 g_assert ((code - buf) <= kMaxCodeSize);
555 mono_arch_flush_icache (buf, code - buf);
556 mono_profiler_code_buffer_new (buf, code - buf, MONO_PROFILER_CODE_BUFFER_HELPER, NULL);
558 tramp_name = mono_get_generic_trampoline_name (tramp_type);
559 *info = mono_tramp_info_create (tramp_name, buf, code - buf, ji, unwind_ops);
566 mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_type, MonoDomain *domain, guint32 *code_len)
568 guint8 *code, *buf, *tramp;
570 gboolean far_addr = FALSE;
572 tramp = mono_get_trampoline_code (tramp_type);
574 if ((((guint64)arg1) >> 32) == 0)
579 code = buf = (guint8 *)mono_domain_code_reserve_align (domain, size, 1);
581 if (((gint64)tramp - (gint64)code) >> 31 != 0 && ((gint64)tramp - (gint64)code) >> 31 != -1) {
582 #ifndef MONO_ARCH_NOMAP32BIT
583 g_assert_not_reached ();
587 code = buf = (guint8 *)mono_domain_code_reserve_align (domain, size, 1);
591 amd64_mov_reg_imm (code, AMD64_R11, tramp);
592 amd64_call_reg (code, AMD64_R11);
594 amd64_call_code (code, tramp);
596 /* The trampoline code will obtain the argument from the instruction stream */
597 if ((((guint64)arg1) >> 32) == 0) {
599 *(guint32*)(code + 1) = (gint64)arg1;
603 *(guint64*)(code + 1) = (gint64)arg1;
607 g_assert ((code - buf) <= size);
612 mono_arch_flush_icache (buf, size);
613 mono_profiler_code_buffer_new (buf, code - buf, MONO_PROFILER_CODE_BUFFER_SPECIFIC_TRAMPOLINE, mono_get_generic_trampoline_simple_name (tramp_type));
619 mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info, gboolean aot)
623 guint8 **rgctx_null_jumps;
628 MonoJumpInfo *ji = NULL;
631 mrgctx = MONO_RGCTX_SLOT_IS_MRGCTX (slot);
632 index = MONO_RGCTX_SLOT_INDEX (slot);
634 index += MONO_SIZEOF_METHOD_RUNTIME_GENERIC_CONTEXT / sizeof (gpointer);
635 for (depth = 0; ; ++depth) {
636 int size = mono_class_rgctx_get_array_size (depth, mrgctx);
638 if (index < size - 1)
643 tramp_size = 64 + 8 * depth;
645 code = buf = (guint8 *)mono_global_codeman_reserve (tramp_size);
647 unwind_ops = mono_arch_get_cie_program ();
649 rgctx_null_jumps = (guint8 **)g_malloc (sizeof (guint8*) * (depth + 2));
653 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
655 /* load rgctx ptr from vtable */
656 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_ARG_REG1, MONO_STRUCT_OFFSET (MonoVTable, runtime_generic_context), sizeof(gpointer));
657 /* is the rgctx ptr null? */
658 amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX);
659 /* if yes, jump to actual trampoline */
660 rgctx_null_jumps [0] = code;
661 amd64_branch8 (code, X86_CC_Z, -1, 1);
664 for (i = 0; i < depth; ++i) {
665 /* load ptr to next array */
666 if (mrgctx && i == 0)
667 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, MONO_SIZEOF_METHOD_RUNTIME_GENERIC_CONTEXT, sizeof(gpointer));
669 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, 0, sizeof(gpointer));
670 /* is the ptr null? */
671 amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX);
672 /* if yes, jump to actual trampoline */
673 rgctx_null_jumps [i + 1] = code;
674 amd64_branch8 (code, X86_CC_Z, -1, 1);
678 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, sizeof (gpointer) * (index + 1), sizeof(gpointer));
679 /* is the slot null? */
680 amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX);
681 /* if yes, jump to actual trampoline */
682 rgctx_null_jumps [depth + 1] = code;
683 amd64_branch8 (code, X86_CC_Z, -1, 1);
684 /* otherwise return */
687 for (i = mrgctx ? 1 : 0; i <= depth + 1; ++i)
688 mono_amd64_patch (rgctx_null_jumps [i], code);
690 g_free (rgctx_null_jumps);
692 /* move the rgctx pointer to the VTABLE register */
693 amd64_mov_reg_reg (code, MONO_ARCH_VTABLE_REG, AMD64_ARG_REG1, sizeof(gpointer));
696 code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, g_strdup_printf ("specific_trampoline_lazy_fetch_%u", slot));
697 amd64_jump_reg (code, AMD64_R11);
699 tramp = (guint8 *)mono_arch_create_specific_trampoline (GUINT_TO_POINTER (slot), MONO_TRAMPOLINE_RGCTX_LAZY_FETCH, mono_get_root_domain (), NULL);
701 /* jump to the actual trampoline */
702 amd64_jump_code (code, tramp);
705 mono_arch_flush_icache (buf, code - buf);
706 mono_profiler_code_buffer_new (buf, code - buf, MONO_PROFILER_CODE_BUFFER_GENERICS_TRAMPOLINE, NULL);
708 g_assert (code - buf <= tramp_size);
710 char *name = mono_get_rgctx_fetch_trampoline_name (slot);
711 *info = mono_tramp_info_create (name, buf, code - buf, ji, unwind_ops);
718 mono_arch_create_general_rgctx_lazy_fetch_trampoline (MonoTrampInfo **info, gboolean aot)
722 MonoJumpInfo *ji = NULL;
728 code = buf = (guint8 *)mono_global_codeman_reserve (tramp_size);
730 unwind_ops = mono_arch_get_cie_program ();
732 // FIXME: Currently, we always go to the slow path.
733 /* This receives a <slot, trampoline> in the rgctx arg reg. */
734 /* Load trampoline addr */
735 amd64_mov_reg_membase (code, AMD64_R11, MONO_ARCH_RGCTX_REG, 8, 8);
736 /* move the rgctx pointer to the VTABLE register */
737 amd64_mov_reg_reg (code, MONO_ARCH_VTABLE_REG, AMD64_ARG_REG1, sizeof(gpointer));
738 /* Jump to the trampoline */
739 amd64_jump_reg (code, AMD64_R11);
741 mono_arch_flush_icache (buf, code - buf);
742 mono_profiler_code_buffer_new (buf, code - buf, MONO_PROFILER_CODE_BUFFER_GENERICS_TRAMPOLINE, NULL);
744 g_assert (code - buf <= tramp_size);
747 *info = mono_tramp_info_create ("rgctx_fetch_trampoline_general", buf, code - buf, ji, unwind_ops);
753 mono_arch_invalidate_method (MonoJitInfo *ji, void *func, gpointer func_arg)
755 /* FIXME: This is not thread safe */
756 guint8 *code = (guint8 *)ji->code_start;
758 amd64_mov_reg_imm (code, AMD64_ARG_REG1, func_arg);
759 amd64_mov_reg_imm (code, AMD64_R11, func);
761 x86_push_imm (code, (guint64)func_arg);
762 amd64_call_reg (code, AMD64_R11);
764 #endif /* !DISABLE_JIT */
767 mono_amd64_handler_block_trampoline_helper (void)
769 MonoJitTlsData *jit_tls = (MonoJitTlsData *)mono_tls_get_jit_tls ();
770 return jit_tls->handler_block_return_address;
775 mono_arch_create_handler_block_trampoline (MonoTrampInfo **info, gboolean aot)
779 MonoJumpInfo *ji = NULL;
782 code = buf = (guint8 *)mono_global_codeman_reserve (tramp_size);
784 unwind_ops = mono_arch_get_cie_program ();
787 * This trampoline restore the call chain of the handler block then jumps into the code that deals with it.
788 * We get here from the ret emitted by CEE_ENDFINALLY.
789 * The stack is misaligned.
791 /* Align the stack before the call to mono_amd64_handler_block_trampoline_helper() */
793 /* Also make room for the "register parameter stack area" as specified by the Windows x64 ABI (4 64-bit registers) */
794 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8 + 4 * 8);
796 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
799 code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_amd64_handler_block_trampoline_helper");
800 amd64_call_reg (code, AMD64_R11);
802 amd64_mov_reg_imm (code, AMD64_RAX, mono_amd64_handler_block_trampoline_helper);
803 amd64_call_reg (code, AMD64_RAX);
805 /* Undo stack alignment */
807 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8 + 4 * 8);
809 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
811 /* Save the result to the stack */
812 amd64_push_reg (code, AMD64_RAX);
814 /* Make room for the "register parameter stack area" as specified by the Windows x64 ABI (4 64-bit registers) */
815 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 4 * 8);
818 char *name = g_strdup_printf ("trampoline_func_%d", MONO_TRAMPOLINE_HANDLER_BLOCK_GUARD);
819 code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, name);
820 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_R11, 8);
822 amd64_mov_reg_imm (code, AMD64_RAX, mono_get_trampoline_func (MONO_TRAMPOLINE_HANDLER_BLOCK_GUARD));
824 /* The stack is aligned */
825 amd64_call_reg (code, AMD64_RAX);
827 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 4 * 8);
829 /* Load return address */
830 amd64_pop_reg (code, AMD64_RAX);
831 /* The stack is misaligned, thats what the code we branch to expects */
832 amd64_jump_reg (code, AMD64_RAX);
834 mono_arch_flush_icache (buf, code - buf);
835 mono_profiler_code_buffer_new (buf, code - buf, MONO_PROFILER_CODE_BUFFER_HELPER, NULL);
836 g_assert (code - buf <= tramp_size);
838 *info = mono_tramp_info_create ("handler_block_trampoline", buf, code - buf, ji, unwind_ops);
842 #endif /* !DISABLE_JIT */
845 * mono_arch_get_call_target:
847 * Return the address called by the code before CODE if exists.
850 mono_arch_get_call_target (guint8 *code)
852 if (code [-5] == 0xe8) {
853 gint32 disp = *(gint32*)(code - 4);
854 guint8 *target = code + disp;
863 * mono_arch_get_plt_info_offset:
865 * Return the PLT info offset belonging to the plt entry PLT_ENTRY.
868 mono_arch_get_plt_info_offset (guint8 *plt_entry, mgreg_t *regs, guint8 *code)
870 return *(guint32*)(plt_entry + 6);
875 * mono_arch_create_sdb_trampoline:
877 * Return a trampoline which captures the current context, passes it to
878 * debugger_agent_single_step_from_context ()/debugger_agent_breakpoint_from_context (),
879 * then restores the (potentially changed) context.
882 mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gboolean aot)
884 int tramp_size = 512;
885 int i, framesize, ctx_offset, cfa_offset, gregs_offset;
887 GSList *unwind_ops = NULL;
888 MonoJumpInfo *ji = NULL;
890 code = buf = (guint8 *)mono_global_codeman_reserve (tramp_size);
894 /* Reserve space where the callee can save the argument registers */
895 framesize += 4 * sizeof (mgreg_t);
898 ctx_offset = framesize;
899 framesize += sizeof (MonoContext);
901 framesize = ALIGN_TO (framesize, MONO_ARCH_FRAME_ALIGNMENT);
905 mono_add_unwind_op_def_cfa (unwind_ops, code, buf, AMD64_RSP, 8);
906 // IP saved at CFA - 8
907 mono_add_unwind_op_offset (unwind_ops, code, buf, AMD64_RIP, -cfa_offset);
909 amd64_push_reg (code, AMD64_RBP);
910 cfa_offset += sizeof(mgreg_t);
911 mono_add_unwind_op_def_cfa_offset (unwind_ops, code, buf, cfa_offset);
912 mono_add_unwind_op_offset (unwind_ops, code, buf, AMD64_RBP, - cfa_offset);
914 amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof(mgreg_t));
915 mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, AMD64_RBP);
916 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, framesize);
918 gregs_offset = ctx_offset + MONO_STRUCT_OFFSET (MonoContext, gregs);
920 /* Initialize a MonoContext structure on the stack */
921 for (i = 0; i < AMD64_NREG; ++i) {
922 if (i != AMD64_RIP && i != AMD64_RSP && i != AMD64_RBP)
923 amd64_mov_membase_reg (code, AMD64_RSP, gregs_offset + (i * sizeof (mgreg_t)), i, sizeof (mgreg_t));
925 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, 0, sizeof (mgreg_t));
926 amd64_mov_membase_reg (code, AMD64_RSP, gregs_offset + (AMD64_RBP * sizeof (mgreg_t)), AMD64_R11, sizeof (mgreg_t));
927 amd64_lea_membase (code, AMD64_R11, AMD64_RBP, 2 * sizeof (mgreg_t));
928 amd64_mov_membase_reg (code, AMD64_RSP, gregs_offset + (AMD64_RSP * sizeof (mgreg_t)), AMD64_R11, sizeof (mgreg_t));
929 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, sizeof (mgreg_t), sizeof (mgreg_t));
930 amd64_mov_membase_reg (code, AMD64_RSP, gregs_offset + (AMD64_RIP * sizeof (mgreg_t)), AMD64_R11, sizeof (mgreg_t));
932 /* Call the single step/breakpoint function in sdb */
933 amd64_lea_membase (code, AMD64_ARG_REG1, AMD64_RSP, ctx_offset);
937 code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "debugger_agent_single_step_from_context");
939 code = mono_arch_emit_load_aotconst (buf, code, &ji, MONO_PATCH_INFO_JIT_ICALL_ADDR, "debugger_agent_breakpoint_from_context");
942 amd64_mov_reg_imm (code, AMD64_R11, debugger_agent_single_step_from_context);
944 amd64_mov_reg_imm (code, AMD64_R11, debugger_agent_breakpoint_from_context);
946 amd64_call_reg (code, AMD64_R11);
948 /* Restore registers from ctx */
949 for (i = 0; i < AMD64_NREG; ++i) {
950 if (i != AMD64_RIP && i != AMD64_RSP && i != AMD64_RBP)
951 amd64_mov_reg_membase (code, i, AMD64_RSP, gregs_offset + (i * sizeof (mgreg_t)), sizeof (mgreg_t));
953 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RSP, gregs_offset + (AMD64_RBP * sizeof (mgreg_t)), sizeof (mgreg_t));
954 amd64_mov_membase_reg (code, AMD64_RBP, 0, AMD64_R11, sizeof (mgreg_t));
955 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RSP, gregs_offset + (AMD64_RIP * sizeof (mgreg_t)), sizeof (mgreg_t));
956 amd64_mov_membase_reg (code, AMD64_RBP, sizeof (mgreg_t), AMD64_R11, sizeof (mgreg_t));
959 cfa_offset -= sizeof (mgreg_t);
960 mono_add_unwind_op_def_cfa (unwind_ops, code, buf, AMD64_RSP, cfa_offset);
963 mono_arch_flush_icache (code, code - buf);
964 mono_profiler_code_buffer_new (buf, code - buf, MONO_PROFILER_CODE_BUFFER_HELPER, NULL);
965 g_assert (code - buf <= tramp_size);
967 const char *tramp_name = single_step ? "sdb_single_step_trampoline" : "sdb_breakpoint_trampoline";
968 *info = mono_tramp_info_create (tramp_name, buf, code - buf, ji, unwind_ops);
974 #ifdef ENABLE_INTERPRETER
976 * mono_arch_get_enter_icall_trampoline:
978 * A trampoline that handles the transition from interpreter into native world.
979 * It requiers to set up a descriptor (MethodArguments) that describes the
980 * required arguments passed to the callee.
983 mono_arch_get_enter_icall_trampoline (MonoTrampInfo **info)
985 const int gregs_num = 6;
986 guint8 *start = NULL, *code, *exits [gregs_num], *leave_tramp;
987 MonoJumpInfo *ji = NULL;
988 GSList *unwind_ops = NULL;
989 static int arg_regs[] = {AMD64_ARG_REG1, AMD64_ARG_REG2, AMD64_ARG_REG3, AMD64_ARG_REG4, AMD64_R8, AMD64_R9};
992 start = code = (guint8 *) mono_global_codeman_reserve (256);
994 /* save MethodArguments* onto stack */
995 amd64_push_reg (code, AMD64_ARG_REG2);
997 /* save target address on stack */
998 amd64_push_reg (code, AMD64_ARG_REG1);
999 amd64_push_reg (code, AMD64_RAX);
1001 /* load pointer to MethodArguments* into R11 */
1002 amd64_mov_reg_reg (code, AMD64_R11, AMD64_ARG_REG2, 8);
1004 /* TODO: do float stuff first */
1006 /* move ilen into RAX */ // TODO: struct offset
1007 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_R11, 0, 8);
1008 /* load pointer to iregs into R11 */ // TODO: struct offset
1009 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 8, 8);
1011 for (i = 0; i < gregs_num; i++) {
1012 amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX);
1014 x86_branch8 (code, X86_CC_Z, 0, FALSE);
1021 amd64_mov_reg_membase (code, arg_regs [i], AMD64_R11, i * sizeof (gpointer), 8);
1023 g_error ("not tested yet.");
1024 amd64_push_reg (code, AMD64_RAX);
1025 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_R11, i * sizeof (gpointer), 8);
1026 amd64_mov_membase_reg (code, AMD64_RBP, offset, AMD64_RAX, sizeof (gpointer));
1027 offset += sizeof (gpointer);
1028 amd64_pop_reg (code, AMD64_RAX);
1030 amd64_dec_reg_size (code, AMD64_RAX, 1);
1033 for (i = 0; i < gregs_num; i++) {
1034 x86_patch (exits [i], code);
1038 amd64_pop_reg (code, AMD64_RAX);
1039 amd64_pop_reg (code, AMD64_R11);
1041 /* call into native function */
1042 amd64_call_reg (code, AMD64_R11);
1044 /* load MethodArguments */
1045 amd64_pop_reg (code, AMD64_R11);
1046 /* load retval */ // TODO: struct offset
1047 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0x20, 8);
1049 amd64_test_reg_reg (code, AMD64_R11, AMD64_R11);
1051 x86_branch8 (code, X86_CC_Z, 0, FALSE);
1053 amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RAX, 8);
1055 x86_patch (leave_tramp, code);
1059 mono_arch_flush_icache (start, code - start);
1060 mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_EXCEPTION_HANDLING, NULL);
1063 *info = mono_tramp_info_create ("enter_icall_trampoline", start, code - start, ji, unwind_ops);
1068 #endif /* !DISABLE_JIT */
1072 mono_arch_get_unbox_trampoline (MonoMethod *m, gpointer addr)
1074 g_assert_not_reached ();
1079 mono_arch_get_static_rgctx_trampoline (MonoMethod *m, MonoMethodRuntimeGenericContext *mrgctx, gpointer addr)
1081 g_assert_not_reached ();
1086 mono_arch_create_rgctx_lazy_fetch_trampoline (guint32 slot, MonoTrampInfo **info, gboolean aot)
1088 g_assert_not_reached ();
1093 mono_arch_create_generic_trampoline (MonoTrampolineType tramp_type, MonoTrampInfo **info, gboolean aot)
1095 g_assert_not_reached ();
1100 mono_arch_create_specific_trampoline (gpointer arg1, MonoTrampolineType tramp_type, MonoDomain *domain, guint32 *code_len)
1102 g_assert_not_reached ();
1107 mono_arch_create_general_rgctx_lazy_fetch_trampoline (MonoTrampInfo **info, gboolean aot)
1109 g_assert_not_reached ();
1114 mono_arch_create_handler_block_trampoline (MonoTrampInfo **info, gboolean aot)
1116 g_assert_not_reached ();
1121 mono_arch_invalidate_method (MonoJitInfo *ji, void *func, gpointer func_arg)
1123 g_assert_not_reached ();
1128 mono_arch_create_sdb_trampoline (gboolean single_step, MonoTrampInfo **info, gboolean aot)
1130 g_assert_not_reached ();
1134 #ifdef ENABLE_INTERPRETER
1136 mono_arch_get_enter_icall_trampoline (MonoTrampInfo **info)
1138 g_assert_not_reached ();
1142 #endif /* DISABLE_JIT */