From: Vlad Brezae Date: Tue, 3 Oct 2017 15:51:20 +0000 (+0300) Subject: Merge pull request #5636 from BrzVlad/fix-xmm-scan X-Git-Url: http://wien.tomnetworks.com/gitweb/?p=mono.git;a=commitdiff_plain;h=66205238fc51274272ba9db9ce07e9b2fb60d1a6;hp=1004d95b6b70e8b67a2b6782e0832faab9fa269a Merge pull request #5636 from BrzVlad/fix-xmm-scan [sgen] Scan simd registers on linux amd64 + apple arm64 --- diff --git a/mono/mini/exceptions-arm64.c b/mono/mini/exceptions-arm64.c index fb8d1736c10..9e19a8d25e7 100644 --- a/mono/mini/exceptions-arm64.c +++ b/mono/mini/exceptions-arm64.c @@ -43,7 +43,7 @@ mono_arch_get_restore_context (MonoTrampInfo **info, gboolean aot) labels [0] = code; arm_cbzx (code, ARMREG_IP1, 0); for (i = 0; i < 32; ++i) - arm_ldrfpx (code, i, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, fregs) + (i * 8)); + arm_ldrfpx (code, i, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, fregs) + (i * sizeof (MonoContextSimdReg))); mono_arm_patch (labels [0], code, MONO_R_ARM64_CBZ); /* Restore gregs */ // FIXME: Restore less registers @@ -124,7 +124,7 @@ mono_arch_get_call_filter (MonoTrampInfo **info, gboolean aot) labels [0] = code; arm_cbzx (code, ARMREG_IP0, 0); for (i = 0; i < num_fregs; ++i) - arm_ldrfpx (code, ARMREG_D8 + i, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, fregs) + ((i + 8) * 8)); + arm_ldrfpx (code, ARMREG_D8 + i, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, fregs) + ((i + 8) * sizeof (MonoContextSimdReg))); mono_arm_patch (labels [0], code, MONO_R_ARM64_CBZ); /* Load fp */ arm_ldrx (code, ARMREG_FP, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_FP * 8)); @@ -393,7 +393,8 @@ mono_arm_throw_exception (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble * /* Initialize a ctx based on the arguments */ memset (&ctx, 0, sizeof (MonoContext)); memcpy (&(ctx.regs [0]), int_regs, sizeof (mgreg_t) * 32); - memcpy (&(ctx.fregs [ARMREG_D8]), fp_regs, sizeof (double) * 8); + for (int i = 0; i < 8; i++) + *((gdouble*)&ctx.fregs [ARMREG_D8 + i]) = fp_regs [i]; ctx.has_fregs = 1; ctx.pc = pc; @@ -422,7 +423,8 @@ mono_arm_resume_unwind (gpointer arg, mgreg_t pc, mgreg_t *int_regs, gdouble *fp /* Initialize a ctx based on the arguments */ memset (&ctx, 0, sizeof (MonoContext)); memcpy (&(ctx.regs [0]), int_regs, sizeof (mgreg_t) * 32); - memcpy (&(ctx.fregs [ARMREG_D8]), fp_regs, sizeof (double) * 8); + for (int i = 0; i < 8; i++) + *((gdouble*)&ctx.fregs [ARMREG_D8 + i]) = fp_regs [i]; ctx.has_fregs = 1; ctx.pc = pc; @@ -460,7 +462,8 @@ mono_arch_unwind_frame (MonoDomain *domain, MonoJitTlsData *jit_tls, memcpy (regs, &new_ctx->regs, sizeof (mgreg_t) * 32); /* v8..v15 are callee saved */ - memcpy (regs + MONO_MAX_IREGS, &(new_ctx->fregs [8]), sizeof (mgreg_t) * 8); + for (int i = 0; i < 8; i++) + (regs + MONO_MAX_IREGS) [i] = *((mgreg_t*)&new_ctx->fregs [8 + i]); mono_unwind_frame (unwind_info, unwind_info_len, ji->code_start, (guint8*)ji->code_start + ji->code_size, @@ -468,7 +471,8 @@ mono_arch_unwind_frame (MonoDomain *domain, MonoJitTlsData *jit_tls, save_locations, MONO_MAX_IREGS, &cfa); memcpy (&new_ctx->regs, regs, sizeof (mgreg_t) * 32); - memcpy (&(new_ctx->fregs [8]), regs + MONO_MAX_IREGS, sizeof (mgreg_t) * 8); + for (int i = 0; i < 8; i++) + *((mgreg_t*)&new_ctx->fregs [8 + i]) = (regs + MONO_MAX_IREGS) [i]; new_ctx->pc = regs [ARMREG_LR]; new_ctx->regs [ARMREG_SP] = (mgreg_t)cfa; diff --git a/mono/mini/mini-darwin.c b/mono/mini/mini-darwin.c index 73be22c7c6e..aca60d498f4 100644 --- a/mono/mini/mini-darwin.c +++ b/mono/mini/mini-darwin.c @@ -100,8 +100,6 @@ mono_thread_state_init_from_handle (MonoThreadUnwindState *tctx, MonoThreadInfo kern_return_t ret; mach_msg_type_number_t num_state, num_fpstate; thread_state_t state, fpstate; - ucontext_t ctx; - mcontext_t mctx; MonoJitTlsData *jit_tls; void *domain; MonoLMF *lmf = NULL; @@ -116,7 +114,6 @@ mono_thread_state_init_from_handle (MonoThreadUnwindState *tctx, MonoThreadInfo state = (thread_state_t) alloca (mono_mach_arch_get_thread_state_size ()); fpstate = (thread_state_t) alloca (mono_mach_arch_get_thread_fpstate_size ()); - mctx = (mcontext_t) alloca (mono_mach_arch_get_mcontext_size ()); do { ret = mono_mach_arch_get_thread_states (info->native_handle, state, &num_state, fpstate, &num_fpstate); @@ -124,10 +121,7 @@ mono_thread_state_init_from_handle (MonoThreadUnwindState *tctx, MonoThreadInfo if (ret != KERN_SUCCESS) return FALSE; - mono_mach_arch_thread_states_to_mcontext (state, fpstate, mctx); - ctx.uc_mcontext = mctx; - - mono_sigctx_to_monoctx (&ctx, &tctx->ctx); + mono_mach_arch_thread_states_to_mono_context (state, fpstate, &tctx->ctx); /* mono_set_jit_tls () sets this */ jit_tls = mono_thread_info_tls_get (info, TLS_KEY_JIT_TLS); diff --git a/mono/utils/mach-support-amd64.c b/mono/utils/mach-support-amd64.c index 733f94c343c..ceef5f25e45 100644 --- a/mono/utils/mach-support-amd64.c +++ b/mono/utils/mach-support-amd64.c @@ -89,6 +89,7 @@ mono_mach_arch_thread_states_to_mono_context (thread_state_t state, thread_state context->gregs [AMD64_RCX] = arch_state->__rcx; context->gregs [AMD64_RDX] = arch_state->__rdx; context->gregs [AMD64_RDI] = arch_state->__rdi; + context->gregs [AMD64_RSI] = arch_state->__rsi; context->gregs [AMD64_RBP] = arch_state->__rbp; context->gregs [AMD64_RSP] = arch_state->__rsp; context->gregs [AMD64_R8] = arch_state->__r8; diff --git a/mono/utils/mach-support-arm64.c b/mono/utils/mach-support-arm64.c index 5033eb19d66..33c07697411 100644 --- a/mono/utils/mach-support-arm64.c +++ b/mono/utils/mach-support-arm64.c @@ -70,18 +70,22 @@ void mono_mach_arch_thread_states_to_mcontext (thread_state_t state, thread_state_t fpstate, void *context) { arm_unified_thread_state_t *arch_state = (arm_unified_thread_state_t *) state; + arm_neon_state64_t *arch_fpstate = (arm_neon_state64_t*) fpstate; struct __darwin_mcontext64 *ctx = (struct __darwin_mcontext64 *) context; ctx->__ss = arch_state->ts_64; + ctx->__ns = *arch_fpstate; } void mono_mach_arch_mcontext_to_thread_states (void *context, thread_state_t state, thread_state_t fpstate) { arm_unified_thread_state_t *arch_state = (arm_unified_thread_state_t *) state; + arm_neon_state64_t *arch_fpstate = (arm_neon_state64_t*) fpstate; struct __darwin_mcontext64 *ctx = (struct __darwin_mcontext64 *) context; arch_state->ts_64 = ctx->__ss; + *arch_fpstate = ctx->__ns; } void @@ -89,6 +93,8 @@ mono_mach_arch_thread_states_to_mono_context (thread_state_t state, thread_state { int i; arm_unified_thread_state_t *arch_state = (arm_unified_thread_state_t *) state; + arm_neon_state64_t *arch_fpstate = (arm_neon_state64_t*) fpstate; + for (i = 0; i < 29; ++i) context->regs [i] = arch_state->ts_64.__x [i]; @@ -96,6 +102,9 @@ mono_mach_arch_thread_states_to_mono_context (thread_state_t state, thread_state context->regs [ARMREG_R30] = arch_state->ts_64.__lr; context->regs [ARMREG_SP] = arch_state->ts_64.__sp; context->pc = arch_state->ts_64.__pc; + + for (i = 0; i < 32; ++i) + context->fregs [i] = arch_fpstate->__v [i]; } int @@ -114,18 +123,28 @@ kern_return_t mono_mach_arch_get_thread_states (thread_port_t thread, thread_state_t state, mach_msg_type_number_t *count, thread_state_t fpstate, mach_msg_type_number_t *fpcount) { arm_unified_thread_state_t *arch_state = (arm_unified_thread_state_t *) state; + arm_neon_state64_t *arch_fpstate = (arm_neon_state64_t *) fpstate; kern_return_t ret; *count = ARM_UNIFIED_THREAD_STATE_COUNT; - ret = thread_get_state (thread, ARM_UNIFIED_THREAD_STATE, (thread_state_t) arch_state, count); + if (ret != KERN_SUCCESS) + return ret; + + *fpcount = ARM_NEON_STATE64_COUNT; + ret = thread_get_state (thread, ARM_NEON_STATE64, (thread_state_t) arch_fpstate, fpcount); return ret; } kern_return_t mono_mach_arch_set_thread_states (thread_port_t thread, thread_state_t state, mach_msg_type_number_t count, thread_state_t fpstate, mach_msg_type_number_t fpcount) { - return thread_set_state (thread, ARM_UNIFIED_THREAD_STATE, state, count); + kern_return_t ret; + ret = thread_set_state (thread, ARM_UNIFIED_THREAD_STATE, state, count); + if (ret != KERN_SUCCESS) + return ret; + ret = thread_set_state (thread, ARM_NEON_STATE64, fpstate, fpcount); + return ret; } void * diff --git a/mono/utils/mono-context.c b/mono/utils/mono-context.c index 3861e54f5e6..59ef74b215c 100644 --- a/mono/utils/mono-context.c +++ b/mono/utils/mono-context.c @@ -401,8 +401,7 @@ mono_sigctx_to_monoctx (void *sigctx, MonoContext *mctx) g_assert (fpctx->head.magic == FPSIMD_MAGIC); for (i = 0; i < 32; ++i) - /* Only store the bottom 8 bytes for now */ - *(guint64*)&(mctx->fregs [i]) = fpctx->vregs [i]; + mctx->fregs [i] = fpctx->vregs [i]; #endif /* FIXME: apple */ #endif diff --git a/mono/utils/mono-context.h b/mono/utils/mono-context.h index 827e0ed9f81..367449edd8f 100644 --- a/mono/utils/mono-context.h +++ b/mono/utils/mono-context.h @@ -22,9 +22,19 @@ #define MONO_CONTEXT_OFFSET(field, index, field_type) \ "i" (offsetof (MonoContext, field) + (index) * sizeof (field_type)) +#if defined(TARGET_X86) #if defined(__APPLE__) typedef struct __darwin_xmm_reg MonoContextSimdReg; #endif +#elif defined(TARGET_AMD64) +#if defined(__APPLE__) +typedef struct __darwin_xmm_reg MonoContextSimdReg; +#elif defined(__linux__) +typedef struct _libc_xmmreg MonoContextSimdReg; +#endif +#elif defined(TARGET_ARM64) +typedef __uint128_t MonoContextSimdReg; +#endif /* * General notes about mono-context. @@ -237,7 +247,7 @@ typedef struct { typedef struct { mgreg_t gregs [AMD64_NREG]; -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__linux__) MonoContextSimdReg fregs [AMD64_XMM_NREG]; #else double fregs [AMD64_XMM_NREG]; @@ -417,7 +427,8 @@ typedef struct { typedef struct { mgreg_t regs [32]; - double fregs [32]; + /* FIXME not fully saved in trampolines */ + MonoContextSimdReg fregs [32]; mgreg_t pc; /* * fregs might not be initialized if this context was created from a @@ -438,12 +449,17 @@ typedef struct { #define MONO_CONTEXT_GET_CURRENT(ctx) do { \ arm_unified_thread_state_t thread_state; \ + arm_neon_state64_t thread_fpstate; \ int state_flavor = ARM_UNIFIED_THREAD_STATE; \ + int fpstate_flavor = ARM_NEON_STATE64; \ unsigned state_count = ARM_UNIFIED_THREAD_STATE_COUNT; \ + unsigned fpstate_count = ARM_NEON_STATE64_COUNT; \ thread_port_t self = mach_thread_self (); \ kern_return_t ret = thread_get_state (self, state_flavor, (thread_state_t) &thread_state, &state_count); \ g_assert (ret == 0); \ - mono_mach_arch_thread_states_to_mono_context ((thread_state_t)&thread_state, (thread_state_t)NULL, &ctx); \ + ret = thread_get_state (self, fpstate_flavor, (thread_state_t) &thread_fpstate, &fpstate_count); \ + g_assert (ret == 0); \ + mono_mach_arch_thread_states_to_mono_context ((thread_state_t) &thread_state, (thread_state_t) &thread_fpstate, &ctx); \ mach_port_deallocate (current_task (), self); \ } while (0); @@ -470,22 +486,22 @@ typedef struct { "stp x30, xzr, [x16], #8\n" \ "mov x30, sp\n" \ "str x30, [x16], #8\n" \ - "stp d0, d1, [x16], #16\n" \ - "stp d2, d3, [x16], #16\n" \ - "stp d4, d5, [x16], #16\n" \ - "stp d6, d7, [x16], #16\n" \ - "stp d8, d9, [x16], #16\n" \ - "stp d10, d11, [x16], #16\n" \ - "stp d12, d13, [x16], #16\n" \ - "stp d14, d15, [x16], #16\n" \ - "stp d16, d17, [x16], #16\n" \ - "stp d18, d19, [x16], #16\n" \ - "stp d20, d21, [x16], #16\n" \ - "stp d22, d23, [x16], #16\n" \ - "stp d24, d25, [x16], #16\n" \ - "stp d26, d27, [x16], #16\n" \ - "stp d28, d29, [x16], #16\n" \ - "stp d30, d31, [x16], #16\n" \ + "stp q0, q1, [x16], #32\n" \ + "stp q2, q3, [x16], #32\n" \ + "stp q4, q5, [x16], #32\n" \ + "stp q6, q7, [x16], #32\n" \ + "stp q8, q9, [x16], #32\n" \ + "stp q10, q11, [x16], #32\n" \ + "stp q12, q13, [x16], #32\n" \ + "stp q14, q15, [x16], #32\n" \ + "stp q16, q17, [x16], #32\n" \ + "stp q18, q19, [x16], #32\n" \ + "stp q20, q21, [x16], #32\n" \ + "stp q22, q23, [x16], #32\n" \ + "stp q24, q25, [x16], #32\n" \ + "stp q26, q27, [x16], #32\n" \ + "stp q28, q29, [x16], #32\n" \ + "stp q30, q31, [x16], #32\n" \ : \ : "r" (&ctx.regs) \ : "x16", "x30", "memory" \ diff --git a/mono/utils/mono-sigcontext.h b/mono/utils/mono-sigcontext.h index 479b8d4bf01..3837e698a16 100644 --- a/mono/utils/mono-sigcontext.h +++ b/mono/utils/mono-sigcontext.h @@ -260,6 +260,7 @@ typedef struct ucontext { #define UCONTEXT_REG_R15(ctx) (((ucontext_t*)(ctx))->sc_r15) #elif !defined(HOST_WIN32) #define UCONTEXT_GREGS(ctx) ((guint64*)&(((ucontext_t*)(ctx))->uc_mcontext.gregs)) + #define UCONTEXT_FREGS(ctx) (((ucontext_t*)(ctx))->uc_mcontext.fpregs->_xmm) #endif #ifdef UCONTEXT_GREGS @@ -282,6 +283,26 @@ typedef struct ucontext { #define UCONTEXT_REG_R15(ctx) (UCONTEXT_GREGS ((ctx)) [REG_R15]) #endif +#ifdef UCONTEXT_FREGS +#define UCONTEXT_REG_XMM +#define UCONTEXT_REG_XMM0(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM0]) +#define UCONTEXT_REG_XMM1(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM1]) +#define UCONTEXT_REG_XMM2(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM2]) +#define UCONTEXT_REG_XMM3(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM3]) +#define UCONTEXT_REG_XMM4(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM4]) +#define UCONTEXT_REG_XMM5(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM5]) +#define UCONTEXT_REG_XMM6(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM6]) +#define UCONTEXT_REG_XMM7(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM7]) +#define UCONTEXT_REG_XMM8(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM8]) +#define UCONTEXT_REG_XMM9(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM9]) +#define UCONTEXT_REG_XMM10(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM10]) +#define UCONTEXT_REG_XMM11(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM11]) +#define UCONTEXT_REG_XMM12(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM12]) +#define UCONTEXT_REG_XMM13(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM13]) +#define UCONTEXT_REG_XMM14(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM14]) +#define UCONTEXT_REG_XMM15(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM15]) +#endif + #elif defined(__mono_ppc__) #if HAVE_UCONTEXT_H