Merge pull request #1949 from lewurm/fixtype
[mono.git] / mono / mini / mini-amd64.c
index f0ca62d8d97a358f1e46b44e832fc73b5abcffb9..9e7412b794f0b116a45438ce5532773650a7cc99 100644 (file)
@@ -65,27 +65,12 @@ static gboolean optimize_for_xen = TRUE;
 #define mono_mini_arch_unlock() mono_mutex_unlock (&mini_arch_mutex)
 static mono_mutex_t mini_arch_mutex;
 
-/*
- * The code generated for sequence points reads from this location, which is
- * made read-only when single stepping is enabled.
- */
-static gpointer ss_trigger_page;
-
-/* Enabled breakpoints read from this trigger page */
-static gpointer bp_trigger_page;
-
-/* The size of the breakpoint sequence */
-static int breakpoint_size;
-
-/* The size of the breakpoint instruction causing the actual fault */
-static int breakpoint_fault_size;
-
-/* The size of the single step instruction causing the actual fault */
-static int single_step_fault_size;
-
 /* The single step trampoline */
 static gpointer ss_trampoline;
 
+/* The breakpoint trampoline */
+static gpointer bp_trampoline;
+
 /* Offset between fp and the first argument in the callee */
 #define ARGS_OFFSET 16
 #define GP_SCRATCH_REG AMD64_R11
@@ -501,6 +486,9 @@ typedef enum {
        ArgOnStack,
        ArgValuetypeInReg,
        ArgValuetypeAddrInIReg,
+       /* gsharedvt argument passed by addr */
+       ArgGSharedVtInReg,
+       ArgGSharedVtOnStack,
        ArgNone /* only in pair_storage */
 } ArgStorage;
 
@@ -523,7 +511,6 @@ typedef struct {
        guint32 reg_usage;
        guint32 freg_usage;
        gboolean need_stack_align;
-       gboolean vtype_retaddr;
        /* The index of the vret arg in the argument list */
        int vret_arg_index;
        ArgInfo ret;
@@ -801,7 +788,7 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
                /* Allways pass in memory */
                ainfo->offset = *stack_size;
                *stack_size += ALIGN_TO (size, 8);
-               ainfo->storage = ArgOnStack;
+               ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
 
                g_free (fields);
                return;
@@ -848,7 +835,7 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
                if (info->native_size > 16) {
                        ainfo->offset = *stack_size;
                        *stack_size += ALIGN_TO (info->native_size, 8);
-                       ainfo->storage = ArgOnStack;
+                       ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
 
                        g_free (fields);
                        return;
@@ -859,7 +846,7 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
                        break;
                default:
                        if (is_return) {
-                               ainfo->storage = ArgOnStack;
+                               ainfo->storage = ArgValuetypeAddrInIReg;
                                ainfo->offset = *stack_size;
                                *stack_size += ALIGN_TO (info->native_size, 8);
                        }
@@ -985,7 +972,7 @@ add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
                                *stack_size += ALIGN_TO (info->native_size, 8);
                        else
                                *stack_size += nquads * sizeof(mgreg_t);
-                       ainfo->storage = ArgOnStack;
+                       ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
                }
        }
 }
@@ -1062,6 +1049,10 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                        cinfo->ret.reg = AMD64_RAX;
                        break;
                }
+               if (mini_is_gsharedvt_type (ret_type)) {
+                       cinfo->ret.storage = ArgValuetypeAddrInIReg;
+                       break;
+               }
                /* fall through */
 #if defined( __native_client_codegen__ )
        case MONO_TYPE_TYPEDBYREF:
@@ -1070,18 +1061,20 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
 
                add_valuetype (sig, &cinfo->ret, ret_type, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
-               if (cinfo->ret.storage == ArgOnStack) {
-                       cinfo->vtype_retaddr = TRUE;
-                       /* The caller passes the address where the value is stored */
-               }
+               g_assert (cinfo->ret.storage != ArgInIReg);
                break;
        }
 #if !defined( __native_client_codegen__ )
        case MONO_TYPE_TYPEDBYREF:
                /* Same as a valuetype with size 24 */
-               cinfo->vtype_retaddr = TRUE;
+               cinfo->ret.storage = ArgValuetypeAddrInIReg;
                break;
 #endif
+       case MONO_TYPE_VAR:
+       case MONO_TYPE_MVAR:
+               g_assert (mini_is_gsharedvt_type (ret_type));
+               cinfo->ret.storage = ArgValuetypeAddrInIReg;
+               break;
        case MONO_TYPE_VOID:
                break;
        default:
@@ -1096,7 +1089,7 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
         * are sometimes made using calli without sig->hasthis set, like in the delegate
         * invoke wrappers.
         */
-       if (cinfo->vtype_retaddr && !is_pinvoke && (sig->hasthis || (sig->param_count > 0 && MONO_TYPE_IS_REFERENCE (mini_get_underlying_type (sig->params [0]))))) {
+       if (cinfo->ret.storage == ArgValuetypeAddrInIReg && !is_pinvoke && (sig->hasthis || (sig->param_count > 0 && MONO_TYPE_IS_REFERENCE (mini_get_underlying_type (sig->params [0]))))) {
                if (sig->hasthis) {
                        add_general (&gr, &stack_size, cinfo->args + 0);
                } else {
@@ -1104,14 +1097,17 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                        pstart = 1;
                }
                add_general (&gr, &stack_size, &cinfo->ret);
+               cinfo->ret.storage = ArgValuetypeAddrInIReg;
                cinfo->vret_arg_index = 1;
        } else {
                /* this */
                if (sig->hasthis)
                        add_general (&gr, &stack_size, cinfo->args + 0);
 
-               if (cinfo->vtype_retaddr)
+               if (cinfo->ret.storage == ArgValuetypeAddrInIReg) {
                        add_general (&gr, &stack_size, &cinfo->ret);
+                       cinfo->ret.storage = ArgValuetypeAddrInIReg;
+               }
        }
 
        if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
@@ -1177,6 +1173,15 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                                add_general (&gr, &stack_size, ainfo);
                                break;
                        }
+                       if (mini_is_gsharedvt_type (ptype)) {
+                               /* gsharedvt arguments are passed by ref */
+                               add_general (&gr, &stack_size, ainfo);
+                               if (ainfo->storage == ArgInIReg)
+                                       ainfo->storage = ArgGSharedVtInReg;
+                               else
+                                       ainfo->storage = ArgGSharedVtOnStack;
+                               break;
+                       }
                        /* fall through */
                case MONO_TYPE_VALUETYPE:
                case MONO_TYPE_TYPEDBYREF:
@@ -1193,6 +1198,16 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
                case MONO_TYPE_R8:
                        add_float (&fr, &stack_size, ainfo, TRUE);
                        break;
+               case MONO_TYPE_VAR:
+               case MONO_TYPE_MVAR:
+                       /* gsharedvt arguments are passed by ref */
+                       g_assert (mini_is_gsharedvt_type (ptype));
+                       add_general (&gr, &stack_size, ainfo);
+                       if (ainfo->storage == ArgInIReg)
+                               ainfo->storage = ArgGSharedVtInReg;
+                       else
+                               ainfo->storage = ArgGSharedVtOnStack;
+                       break;
                default:
                        g_assert_not_reached ();
                }
@@ -1295,8 +1310,6 @@ mono_arch_cpu_init (void)
 void
 mono_arch_init (void)
 {
-       int flags;
-
        mono_mutex_init_recursive (&mini_arch_mutex);
 #if defined(__native_client_codegen__)
        mono_native_tls_alloc (&nacl_instruction_depth, NULL);
@@ -1305,29 +1318,13 @@ mono_arch_init (void)
        mono_native_tls_alloc (&nacl_legacy_prefix_tag, NULL);
 #endif
 
-#ifdef MONO_ARCH_NOMAP32BIT
-       flags = MONO_MMAP_READ;
-       /* amd64_mov_reg_imm () + amd64_mov_reg_membase () */
-       breakpoint_size = 13;
-       breakpoint_fault_size = 3;
-#else
-       flags = MONO_MMAP_READ|MONO_MMAP_32BIT;
-       /* amd64_mov_reg_mem () */
-       breakpoint_size = 8;
-       breakpoint_fault_size = 8;
-#endif
-
-       /* amd64_alu_membase_imm_size (code, X86_CMP, AMD64_R11, 0, 0, 4); */
-       single_step_fault_size = 4;
-
-       ss_trigger_page = mono_valloc (NULL, mono_pagesize (), flags);
-       bp_trigger_page = mono_valloc (NULL, mono_pagesize (), flags);
-       mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
-
        mono_aot_register_jit_icall ("mono_amd64_throw_exception", mono_amd64_throw_exception);
        mono_aot_register_jit_icall ("mono_amd64_throw_corlib_exception", mono_amd64_throw_corlib_exception);
        mono_aot_register_jit_icall ("mono_amd64_resume_unwind", mono_amd64_resume_unwind);
        mono_aot_register_jit_icall ("mono_amd64_get_original_ip", mono_amd64_get_original_ip);
+
+       if (!mono_aot_only)
+               bp_trampoline = mini_get_breakpoint_trampoline ();
 }
 
 /*
@@ -1654,7 +1651,7 @@ mono_arch_fill_argument_info (MonoCompile *cfg)
                case ArgInIReg:
                case ArgInFloatSSEReg:
                case ArgInDoubleSSEReg:
-                       if ((MONO_TYPE_ISSTRUCT (sig_ret) && !mono_class_from_mono_type (sig_ret)->enumtype) || ((sig_ret->type == MONO_TYPE_TYPEDBYREF) && cinfo->vtype_retaddr)) {
+                       if ((MONO_TYPE_ISSTRUCT (sig_ret) && !mono_class_from_mono_type (sig_ret)->enumtype) || ((sig_ret->type == MONO_TYPE_TYPEDBYREF) && cinfo->ret.storage == ArgValuetypeAddrInIReg)) {
                                cfg->vret_addr->opcode = OP_REGVAR;
                                cfg->vret_addr->inst_c0 = cinfo->ret.reg;
                        }
@@ -1764,25 +1761,23 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                case ArgInIReg:
                case ArgInFloatSSEReg:
                case ArgInDoubleSSEReg:
-                       if ((MONO_TYPE_ISSTRUCT (sig_ret) && !mono_class_from_mono_type (sig_ret)->enumtype) || ((sig_ret->type == MONO_TYPE_TYPEDBYREF) && cinfo->vtype_retaddr)) {
-                               /* The register is volatile */
-                               cfg->vret_addr->opcode = OP_REGOFFSET;
-                               cfg->vret_addr->inst_basereg = cfg->frame_reg;
-                               if (cfg->arch.omit_fp) {
-                                       cfg->vret_addr->inst_offset = offset;
-                                       offset += 8;
-                               } else {
-                                       offset += 8;
-                                       cfg->vret_addr->inst_offset = -offset;
-                               }
-                               if (G_UNLIKELY (cfg->verbose_level > 1)) {
-                                       printf ("vret_addr =");
-                                       mono_print_ins (cfg->vret_addr);
-                               }
+                       cfg->ret->opcode = OP_REGVAR;
+                       cfg->ret->inst_c0 = cinfo->ret.reg;
+                       break;
+               case ArgValuetypeAddrInIReg:
+                       /* The register is volatile */
+                       cfg->vret_addr->opcode = OP_REGOFFSET;
+                       cfg->vret_addr->inst_basereg = cfg->frame_reg;
+                       if (cfg->arch.omit_fp) {
+                               cfg->vret_addr->inst_offset = offset;
+                               offset += 8;
+                       } else {
+                               offset += 8;
+                               cfg->vret_addr->inst_offset = -offset;
                        }
-                       else {
-                               cfg->ret->opcode = OP_REGVAR;
-                               cfg->ret->inst_c0 = cinfo->ret.reg;
+                       if (G_UNLIKELY (cfg->verbose_level > 1)) {
+                               printf ("vret_addr =");
+                               mono_print_ins (cfg->vret_addr);
                        }
                        break;
                case ArgValuetypeInReg:
@@ -1860,7 +1855,7 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                         * are volatile across calls.
                         * FIXME: Optimize this.
                         */
-                       if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg))
+                       if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg) || (ainfo->storage == ArgGSharedVtInReg))
                                inreg = FALSE;
 
                        ins->opcode = OP_REGOFFSET;
@@ -1869,12 +1864,14 @@ mono_arch_allocate_vars (MonoCompile *cfg)
                        case ArgInIReg:
                        case ArgInFloatSSEReg:
                        case ArgInDoubleSSEReg:
+                       case ArgGSharedVtInReg:
                                if (inreg) {
                                        ins->opcode = OP_REGVAR;
                                        ins->dreg = ainfo->reg;
                                }
                                break;
                        case ArgOnStack:
+                       case ArgGSharedVtOnStack:
                                g_assert (!cfg->arch.omit_fp);
                                ins->opcode = OP_REGOFFSET;
                                ins->inst_basereg = cfg->frame_reg;
@@ -1947,7 +1944,7 @@ mono_arch_create_vars (MonoCompile *cfg)
                cfg->ret_var_is_local = TRUE;
 
        sig_ret = mini_get_underlying_type (sig->ret);
-       if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig_ret)) {
+       if (cinfo->ret.storage == ArgValuetypeAddrInIReg) {
                cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
                if (G_UNLIKELY (cfg->verbose_level > 1)) {
                        printf ("vret_addr = ");
@@ -1962,15 +1959,14 @@ mono_arch_create_vars (MonoCompile *cfg)
                        MonoInst *ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
                        ins->flags |= MONO_INST_VOLATILE;
                        cfg->arch.seq_point_info_var = ins;
-
-                       ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
-                       ins->flags |= MONO_INST_VOLATILE;
-                       cfg->arch.ss_tramp_var = ins;
                }
+               ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+               ins->flags |= MONO_INST_VOLATILE;
+               cfg->arch.ss_tramp_var = ins;
 
-           ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+               ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
                ins->flags |= MONO_INST_VOLATILE;
-               cfg->arch.ss_trigger_page_var = ins;
+               cfg->arch.bp_tramp_var = ins;
        }
 
        if (cfg->method->save_lmf)
@@ -2078,6 +2074,9 @@ arg_storage_to_llvm_arg_storage (MonoCompile *cfg, ArgStorage storage)
                return LLVMArgInIReg;
        case ArgNone:
                return LLVMArgNone;
+       case ArgGSharedVtInReg:
+       case ArgGSharedVtOnStack:
+               return LLVMArgGSharedVt;
        default:
                g_assert_not_reached ();
                return LLVMArgNone;
@@ -2119,7 +2118,7 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
                        linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
        }
 
-       if (MONO_TYPE_ISSTRUCT (sig_ret) && cinfo->ret.storage == ArgInIReg) {
+       if (cinfo->ret.storage == ArgValuetypeAddrInIReg) {
                /* Vtype returned using a hidden argument */
                linfo->ret.storage = LLVMArgVtypeRetAddr;
                linfo->vret_arg_index = cinfo->vret_arg_index;
@@ -2167,6 +2166,10 @@ mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
                        for (j = 0; j < 2; ++j)
                                linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
                        break;
+               case ArgGSharedVtInReg:
+               case ArgGSharedVtOnStack:
+                       linfo->args [i].storage = LLVMArgGSharedVt;
+                       break;
                default:
                        cfg->exception_message = g_strdup ("ainfo->storage");
                        cfg->disable_llvm = TRUE;
@@ -2273,53 +2276,59 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
                case ArgOnStack:
                case ArgValuetypeInReg:
                case ArgValuetypeAddrInIReg:
+               case ArgGSharedVtInReg:
+               case ArgGSharedVtOnStack: {
+                       if (ainfo->storage == ArgOnStack && !MONO_TYPE_ISSTRUCT (t) && !call->tail_call)
+                               /* Already emitted above */
+                               break;
                        if (ainfo->storage == ArgOnStack && call->tail_call) {
                                MonoInst *call_inst = (MonoInst*)call;
                                cfg->args [i]->flags |= MONO_INST_VOLATILE;
                                EMIT_NEW_ARGSTORE (cfg, call_inst, i, in);
-                       } else if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
-                               guint32 align;
-                               guint32 size;
+                               break;
+                       }
 
-                               if (t->type == MONO_TYPE_TYPEDBYREF) {
-                                       size = sizeof (MonoTypedRef);
-                                       align = sizeof (gpointer);
-                               }
+                       guint32 align;
+                       guint32 size;
+
+                       if (t->type == MONO_TYPE_TYPEDBYREF) {
+                               size = sizeof (MonoTypedRef);
+                               align = sizeof (gpointer);
+                       }
+                       else {
+                               if (sig->pinvoke)
+                                       size = mono_type_native_stack_size (t, &align);
                                else {
-                                       if (sig->pinvoke)
-                                               size = mono_type_native_stack_size (t, &align);
-                                       else {
-                                               /* 
-                                                * Other backends use mono_type_stack_size (), but that
-                                                * aligns the size to 8, which is larger than the size of
-                                                * the source, leading to reads of invalid memory if the
-                                                * source is at the end of address space.
-                                                */
-                                               size = mono_class_value_size (mono_class_from_mono_type (t), &align);
-                                       }
+                                       /*
+                                        * Other backends use mono_type_stack_size (), but that
+                                        * aligns the size to 8, which is larger than the size of
+                                        * the source, leading to reads of invalid memory if the
+                                        * source is at the end of address space.
+                                        */
+                                       size = mono_class_value_size (mono_class_from_mono_type (t), &align);
                                }
-                               g_assert (in->klass);
+                       }
 
-                               if (ainfo->storage == ArgOnStack && size >= 10000) {
-                                       /* Avoid asserts in emit_memcpy () */
-                                       cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
-                                       cfg->exception_message = g_strdup_printf ("Passing an argument of size '%d'.", size);
-                                       /* Continue normally */
-                               }
+                       if (size >= 10000) {
+                               /* Avoid asserts in emit_memcpy () */
+                               cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
+                               cfg->exception_message = g_strdup_printf ("Passing an argument of size '%d'.", size);
+                               /* Continue normally */
+                       }
 
-                               if (size > 0) {
-                                       MONO_INST_NEW (cfg, arg, OP_OUTARG_VT);
-                                       arg->sreg1 = in->dreg;
-                                       arg->klass = mono_class_from_mono_type (t);
-                                       arg->backend.size = size;
-                                       arg->inst_p0 = call;
-                                       arg->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
-                                       memcpy (arg->inst_p1, ainfo, sizeof (ArgInfo));
+                       if (size > 0) {
+                               MONO_INST_NEW (cfg, arg, OP_OUTARG_VT);
+                               arg->sreg1 = in->dreg;
+                               arg->klass = mono_class_from_mono_type (t);
+                               arg->backend.size = size;
+                               arg->inst_p0 = call;
+                               arg->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
+                               memcpy (arg->inst_p1, ainfo, sizeof (ArgInfo));
 
-                                       MONO_ADD_INS (cfg->cbb, arg);
-                               }
+                               MONO_ADD_INS (cfg->cbb, arg);
                        }
                        break;
+               }
                default:
                        g_assert_not_reached ();
                }
@@ -2333,49 +2342,50 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
        if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sig->sentinelpos))
                emit_sig_cookie (cfg, call, cinfo);
 
-       sig_ret = mini_get_underlying_type (sig->ret);
-       if (sig_ret && MONO_TYPE_ISSTRUCT (sig_ret)) {
-               MonoInst *vtarg;
-
-               if (cinfo->ret.storage == ArgValuetypeInReg) {
-                       if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) {
-                               /*
-                                * Tell the JIT to use a more efficient calling convention: call using
-                                * OP_CALL, compute the result location after the call, and save the 
-                                * result there.
-                                */
-                               call->vret_in_reg = TRUE;
-                               /* 
-                                * Nullify the instruction computing the vret addr to enable 
-                                * future optimizations.
-                                */
-                               if (call->vret_var)
-                                       NULLIFY_INS (call->vret_var);
-                       } else {
-                               if (call->tail_call)
-                                       NOT_IMPLEMENTED;
-                               /*
-                                * The valuetype is in RAX:RDX after the call, need to be copied to
-                                * the stack. Push the address here, so the call instruction can
-                                * access it.
-                                */
-                               if (!cfg->arch.vret_addr_loc) {
-                                       cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
-                                       /* Prevent it from being register allocated or optimized away */
-                                       ((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE;
-                               }
-
-                               MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ((MonoInst*)cfg->arch.vret_addr_loc)->dreg, call->vret_var->dreg);
+       switch (cinfo->ret.storage) {
+       case ArgValuetypeInReg:
+               if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) {
+                       /*
+                        * Tell the JIT to use a more efficient calling convention: call using
+                        * OP_CALL, compute the result location after the call, and save the
+                        * result there.
+                        */
+                       call->vret_in_reg = TRUE;
+                       /*
+                        * Nullify the instruction computing the vret addr to enable
+                        * future optimizations.
+                        */
+                       if (call->vret_var)
+                               NULLIFY_INS (call->vret_var);
+               } else {
+                       if (call->tail_call)
+                               NOT_IMPLEMENTED;
+                       /*
+                        * The valuetype is in RAX:RDX after the call, need to be copied to
+                        * the stack. Push the address here, so the call instruction can
+                        * access it.
+                        */
+                       if (!cfg->arch.vret_addr_loc) {
+                               cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
+                               /* Prevent it from being register allocated or optimized away */
+                               ((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE;
                        }
-               }
-               else {
-                       MONO_INST_NEW (cfg, vtarg, OP_MOVE);
-                       vtarg->sreg1 = call->vret_var->dreg;
-                       vtarg->dreg = mono_alloc_preg (cfg);
-                       MONO_ADD_INS (cfg->cbb, vtarg);
 
-                       mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
+                       MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ((MonoInst*)cfg->arch.vret_addr_loc)->dreg, call->vret_var->dreg);
                }
+               break;
+       case ArgValuetypeAddrInIReg: {
+               MonoInst *vtarg;
+               MONO_INST_NEW (cfg, vtarg, OP_MOVE);
+               vtarg->sreg1 = call->vret_var->dreg;
+               vtarg->dreg = mono_alloc_preg (cfg);
+               MONO_ADD_INS (cfg->cbb, vtarg);
+
+               mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
+               break;
+       }
+       default:
+               break;
        }
 
        if (cfg->method->save_lmf) {
@@ -2394,7 +2404,8 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
        ArgInfo *ainfo = (ArgInfo*)ins->inst_p1;
        int size = ins->backend.size;
 
-       if (ainfo->storage == ArgValuetypeInReg) {
+       switch (ainfo->storage) {
+       case ArgValuetypeInReg: {
                MonoInst *load;
                int part;
 
@@ -2421,7 +2432,9 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
 
                        add_outarg_reg (cfg, call, ainfo->pair_storage [part], ainfo->pair_regs [part], load);
                }
-       } else if (ainfo->storage == ArgValuetypeAddrInIReg) {
+               break;
+       }
+       case ArgValuetypeAddrInIReg: {
                MonoInst *vtaddr, *load;
                vtaddr = mono_compile_create_var (cfg, &ins->klass->byval_arg, OP_LOCAL);
                
@@ -2445,7 +2458,16 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
                } else {
                        MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, load->dreg);
                }
-       } else {
+               break;
+       }
+       case ArgGSharedVtInReg:
+               /* Pass by addr */
+               mono_call_inst_add_outarg_reg (cfg, call, src->dreg, ainfo->reg, FALSE);
+               break;
+       case ArgGSharedVtOnStack:
+               g_assert_not_reached ();
+               break;
+       default:
                if (size == 8) {
                        int dreg = mono_alloc_ireg (cfg);
 
@@ -2637,7 +2659,7 @@ mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, g
                        pindex = 1;
        }
 
-       if (dinfo->cinfo->vtype_retaddr)
+       if (dinfo->cinfo->ret.storage == ArgValuetypeAddrInIReg)
                p->regs [greg ++] = PTR_TO_GREG(ret);
 
        for (i = pindex; i < sig->param_count; i++) {
@@ -2783,7 +2805,7 @@ mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
                        /* Fall through */
                }
        case MONO_TYPE_VALUETYPE:
-               if (dinfo->cinfo->vtype_retaddr) {
+               if (dinfo->cinfo->ret.storage == ArgValuetypeAddrInIReg) {
                        /* Nothing to do */
                } else {
                        ArgInfo *ainfo = &dinfo->cinfo->ret;
@@ -4130,35 +4152,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
                        break;
                case OP_SEQ_POINT: {
-                       int i;
-
                        if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
-                               if (cfg->compile_aot) {
-                                       MonoInst *var = cfg->arch.ss_tramp_var;
-                                       guint8 *label;
-
-                                       /* Load ss_tramp_var */
-                                       amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
-                                       /* Load the trampoline address */
-                                       amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, 8);
-                                       /* Call it if it is non-null */
-                                       amd64_test_reg_reg (code, AMD64_R11, AMD64_R11);
-                                       label = code;
-                                       amd64_branch8 (code, X86_CC_Z, 0, FALSE);
-                                       amd64_call_reg (code, AMD64_R11);
-                                       amd64_patch (label, code);
-                               } else {
-                                       /* 
-                                        * Read from the single stepping trigger page. This will cause a
-                                        * SIGSEGV when single stepping is enabled.
-                                        * We do this _before_ the breakpoint, so single stepping after
-                                        * a breakpoint is hit will step to the next IL offset.
-                                        */
-                                       MonoInst *var = cfg->arch.ss_trigger_page_var;
+                               MonoInst *var = cfg->arch.ss_tramp_var;
+                               guint8 *label;
 
-                                       amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
-                                       amd64_alu_membase_imm_size (code, X86_CMP, AMD64_R11, 0, 0, 4);
-                               }
+                               /* Load ss_tramp_var */
+                               /* This is equal to &ss_trampoline */
+                               amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
+                               /* Load the trampoline address */
+                               amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, 8);
+                               /* Call it if it is non-null */
+                               amd64_test_reg_reg (code, AMD64_R11, AMD64_R11);
+                               label = code;
+                               amd64_branch8 (code, X86_CC_Z, 0, FALSE);
+                               amd64_call_reg (code, AMD64_R11);
+                               amd64_patch (label, code);
                        }
 
                        /* 
@@ -4184,12 +4192,26 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                amd64_call_reg (code, AMD64_R11);
                                amd64_patch (label, code);
                        } else {
-                               /* 
-                                * A placeholder for a possible breakpoint inserted by
-                                * mono_arch_set_breakpoint ().
+                               MonoInst *var = cfg->arch.bp_tramp_var;
+                               guint8 *label;
+
+                               /*
+                                * Emit a test+branch against a constant, the constant will be overwritten
+                                * by mono_arch_set_breakpoint () to cause the test to fail.
                                 */
-                               for (i = 0; i < breakpoint_size; ++i)
-                                       x86_nop (code);
+                               amd64_mov_reg_imm (code, AMD64_R11, 0);
+                               amd64_test_reg_reg (code, AMD64_R11, AMD64_R11);
+                               label = code;
+                               amd64_branch8 (code, X86_CC_Z, 0, FALSE);
+
+                               g_assert (var);
+                               g_assert (var->opcode == OP_REGOFFSET);
+                               /* Load bp_tramp_var */
+                               /* This is equal to &bp_trampoline */
+                               amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
+                               /* Call the trampoline */
+                               amd64_call_membase (code, AMD64_R11, 0);
+                               amd64_patch (label, code);
                        }
                        /*
                         * Add an additional nop so skipping the bp doesn't cause the ip to point
@@ -7157,12 +7179,18 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, MONO_STRUCT_OFFSET (SeqPointInfo, ss_tramp_addr), 8);
                        amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, AMD64_R11, 8);
                } else {
-                       /* Initialize ss_trigger_page_var */
-                       ins = cfg->arch.ss_trigger_page_var;
+                       /* Initialize ss_tramp_var */
+                       ins = cfg->arch.ss_tramp_var;
+                       g_assert (ins->opcode == OP_REGOFFSET);
+
+                       amd64_mov_reg_imm (code, AMD64_R11, (guint64)&ss_trampoline);
+                       amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, AMD64_R11, 8);
 
+                       /* Initialize bp_tramp_var */
+                       ins = cfg->arch.bp_tramp_var;
                        g_assert (ins->opcode == OP_REGOFFSET);
 
-                       amd64_mov_reg_imm (code, AMD64_R11, (guint64)ss_trigger_page);
+                       amd64_mov_reg_imm (code, AMD64_R11, (guint64)&bp_trampoline);
                        amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, AMD64_R11, 8);
                }
        }
@@ -8415,7 +8443,6 @@ void
 mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
 {
        guint8 *code = ip;
-       guint8 *orig_code = code;
 
        if (ji->from_aot) {
                guint32 native_offset = ip - (guint8*)ji->code_start;
@@ -8424,20 +8451,10 @@ mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
                g_assert (info->bp_addrs [native_offset] == 0);
                info->bp_addrs [native_offset] = mini_get_breakpoint_trampoline ();
        } else {
-               /* 
-                * In production, we will use int3 (has to fix the size in the md 
-                * file). But that could confuse gdb, so during development, we emit a SIGSEGV
-                * instead.
-                */
-               g_assert (code [0] == 0x90);
-               if (breakpoint_size == 8) {
-                       amd64_mov_reg_mem (code, AMD64_R11, (guint64)bp_trigger_page, 4);
-               } else {
-                       amd64_mov_reg_imm_size (code, AMD64_R11, (guint64)bp_trigger_page, 8);
-                       amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, 4);
-               }
-
-               g_assert (code - orig_code == breakpoint_size);
+               /* ip points to a mov r11, 0 */
+               g_assert (code [0] == 0x41);
+               g_assert (code [1] == 0xbb);
+               amd64_mov_reg_imm (code, AMD64_R11, 1);
        }
 }
 
@@ -8450,7 +8467,6 @@ void
 mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
 {
        guint8 *code = ip;
-       int i;
 
        if (ji->from_aot) {
                guint32 native_offset = ip - (guint8*)ji->code_start;
@@ -8458,28 +8474,15 @@ mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
 
                info->bp_addrs [native_offset] = NULL;
        } else {
-               for (i = 0; i < breakpoint_size; ++i)
-                       x86_nop (code);
+               amd64_mov_reg_imm (code, AMD64_R11, 0);
        }
 }
 
 gboolean
 mono_arch_is_breakpoint_event (void *info, void *sigctx)
 {
-#ifdef HOST_WIN32
-       EXCEPTION_RECORD* einfo = ((EXCEPTION_POINTERS*)info)->ExceptionRecord;
-       if (einfo->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && (gpointer)einfo->ExceptionInformation [1] == bp_trigger_page)
-               return TRUE;
-       else
-               return FALSE;
-#else
-       siginfo_t* sinfo = (siginfo_t*) info;
-       /* Sometimes the address is off by 4 */
-       if (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128)
-               return TRUE;
-       else
-               return FALSE;
-#endif
+       /* We use soft breakpoints on amd64 */
+       return FALSE;
 }
 
 /*
@@ -8491,11 +8494,7 @@ mono_arch_is_breakpoint_event (void *info, void *sigctx)
 void
 mono_arch_skip_breakpoint (MonoContext *ctx, MonoJitInfo *ji)
 {
-       if (ji->from_aot) {
-               /* The breakpoint instruction is a call */
-       } else {
-               MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + breakpoint_fault_size);
-       }
+       g_assert_not_reached ();
 }
        
 /*
@@ -8506,7 +8505,6 @@ mono_arch_skip_breakpoint (MonoContext *ctx, MonoJitInfo *ji)
 void
 mono_arch_start_single_stepping (void)
 {
-       mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
        ss_trampoline = mini_get_single_step_trampoline ();
 }
        
@@ -8518,7 +8516,6 @@ mono_arch_start_single_stepping (void)
 void
 mono_arch_stop_single_stepping (void)
 {
-       mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
        ss_trampoline = NULL;
 }
 
@@ -8531,20 +8528,8 @@ mono_arch_stop_single_stepping (void)
 gboolean
 mono_arch_is_single_step_event (void *info, void *sigctx)
 {
-#ifdef HOST_WIN32
-       EXCEPTION_RECORD* einfo = ((EXCEPTION_POINTERS*)info)->ExceptionRecord;
-       if (einfo->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && (gpointer)einfo->ExceptionInformation [1] == ss_trigger_page)
-               return TRUE;
-       else
-               return FALSE;
-#else
-       siginfo_t* sinfo = (siginfo_t*) info;
-       /* Sometimes the address is off by 4 */
-       if (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128)
-               return TRUE;
-       else
-               return FALSE;
-#endif
+       /* We use soft breakpoints on amd64 */
+       return FALSE;
 }
 
 /*
@@ -8556,7 +8541,7 @@ mono_arch_is_single_step_event (void *info, void *sigctx)
 void
 mono_arch_skip_single_step (MonoContext *ctx)
 {
-       MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + single_step_fault_size);
+       g_assert_not_reached ();
 }
 
 /*
@@ -8642,3 +8627,9 @@ mono_arch_opcode_supported (int opcode)
                return FALSE;
        }
 }
+
+#if defined(ENABLE_GSHAREDVT)
+
+#include "../../../mono-extensions/mono/mini/mini-amd64-gsharedvt.c"
+
+#endif /* !MONOTOUCH */