labels [0] = code;
arm_cbzx (code, ARMREG_IP1, 0);
for (i = 0; i < 32; ++i)
- arm_ldrfpx (code, i, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, fregs) + (i * 8));
+ arm_ldrfpx (code, i, ctx_reg, MONO_STRUCT_OFFSET (MonoContext, fregs) + (i * sizeof (MonoContextSimdReg)));
mono_arm_patch (labels [0], code, MONO_R_ARM64_CBZ);
/* Restore gregs */
// FIXME: Restore less registers
labels [0] = code;
arm_cbzx (code, ARMREG_IP0, 0);
for (i = 0; i < num_fregs; ++i)
- arm_ldrfpx (code, ARMREG_D8 + i, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, fregs) + ((i + 8) * 8));
+ arm_ldrfpx (code, ARMREG_D8 + i, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, fregs) + ((i + 8) * sizeof (MonoContextSimdReg)));
mono_arm_patch (labels [0], code, MONO_R_ARM64_CBZ);
/* Load fp */
arm_ldrx (code, ARMREG_FP, ARMREG_R0, MONO_STRUCT_OFFSET (MonoContext, regs) + (ARMREG_FP * 8));
/* Initialize a ctx based on the arguments */
memset (&ctx, 0, sizeof (MonoContext));
memcpy (&(ctx.regs [0]), int_regs, sizeof (mgreg_t) * 32);
- memcpy (&(ctx.fregs [ARMREG_D8]), fp_regs, sizeof (double) * 8);
+ for (int i = 0; i < 8; i++)
+ *((gdouble*)&ctx.fregs [ARMREG_D8 + i]) = fp_regs [i];
ctx.has_fregs = 1;
ctx.pc = pc;
/* Initialize a ctx based on the arguments */
memset (&ctx, 0, sizeof (MonoContext));
memcpy (&(ctx.regs [0]), int_regs, sizeof (mgreg_t) * 32);
- memcpy (&(ctx.fregs [ARMREG_D8]), fp_regs, sizeof (double) * 8);
+ for (int i = 0; i < 8; i++)
+ *((gdouble*)&ctx.fregs [ARMREG_D8 + i]) = fp_regs [i];
ctx.has_fregs = 1;
ctx.pc = pc;
memcpy (regs, &new_ctx->regs, sizeof (mgreg_t) * 32);
/* v8..v15 are callee saved */
- memcpy (regs + MONO_MAX_IREGS, &(new_ctx->fregs [8]), sizeof (mgreg_t) * 8);
+ for (int i = 0; i < 8; i++)
+ (regs + MONO_MAX_IREGS) [i] = *((mgreg_t*)&new_ctx->fregs [8 + i]);
mono_unwind_frame (unwind_info, unwind_info_len, ji->code_start,
(guint8*)ji->code_start + ji->code_size,
save_locations, MONO_MAX_IREGS, &cfa);
memcpy (&new_ctx->regs, regs, sizeof (mgreg_t) * 32);
- memcpy (&(new_ctx->fregs [8]), regs + MONO_MAX_IREGS, sizeof (mgreg_t) * 8);
+ for (int i = 0; i < 8; i++)
+ *((mgreg_t*)&new_ctx->fregs [8 + i]) = (regs + MONO_MAX_IREGS) [i];
new_ctx->pc = regs [ARMREG_LR];
new_ctx->regs [ARMREG_SP] = (mgreg_t)cfa;
kern_return_t ret;
mach_msg_type_number_t num_state, num_fpstate;
thread_state_t state, fpstate;
- ucontext_t ctx;
- mcontext_t mctx;
MonoJitTlsData *jit_tls;
void *domain;
MonoLMF *lmf = NULL;
state = (thread_state_t) alloca (mono_mach_arch_get_thread_state_size ());
fpstate = (thread_state_t) alloca (mono_mach_arch_get_thread_fpstate_size ());
- mctx = (mcontext_t) alloca (mono_mach_arch_get_mcontext_size ());
do {
ret = mono_mach_arch_get_thread_states (info->native_handle, state, &num_state, fpstate, &num_fpstate);
if (ret != KERN_SUCCESS)
return FALSE;
- mono_mach_arch_thread_states_to_mcontext (state, fpstate, mctx);
- ctx.uc_mcontext = mctx;
-
- mono_sigctx_to_monoctx (&ctx, &tctx->ctx);
+ mono_mach_arch_thread_states_to_mono_context (state, fpstate, &tctx->ctx);
/* mono_set_jit_tls () sets this */
jit_tls = mono_thread_info_tls_get (info, TLS_KEY_JIT_TLS);
context->gregs [AMD64_RCX] = arch_state->__rcx;
context->gregs [AMD64_RDX] = arch_state->__rdx;
context->gregs [AMD64_RDI] = arch_state->__rdi;
+ context->gregs [AMD64_RSI] = arch_state->__rsi;
context->gregs [AMD64_RBP] = arch_state->__rbp;
context->gregs [AMD64_RSP] = arch_state->__rsp;
context->gregs [AMD64_R8] = arch_state->__r8;
mono_mach_arch_thread_states_to_mcontext (thread_state_t state, thread_state_t fpstate, void *context)
{
arm_unified_thread_state_t *arch_state = (arm_unified_thread_state_t *) state;
+ arm_neon_state64_t *arch_fpstate = (arm_neon_state64_t*) fpstate;
struct __darwin_mcontext64 *ctx = (struct __darwin_mcontext64 *) context;
ctx->__ss = arch_state->ts_64;
+ ctx->__ns = *arch_fpstate;
}
void
mono_mach_arch_mcontext_to_thread_states (void *context, thread_state_t state, thread_state_t fpstate)
{
arm_unified_thread_state_t *arch_state = (arm_unified_thread_state_t *) state;
+ arm_neon_state64_t *arch_fpstate = (arm_neon_state64_t*) fpstate;
struct __darwin_mcontext64 *ctx = (struct __darwin_mcontext64 *) context;
arch_state->ts_64 = ctx->__ss;
+ *arch_fpstate = ctx->__ns;
}
void
{
int i;
arm_unified_thread_state_t *arch_state = (arm_unified_thread_state_t *) state;
+ arm_neon_state64_t *arch_fpstate = (arm_neon_state64_t*) fpstate;
+
for (i = 0; i < 29; ++i)
context->regs [i] = arch_state->ts_64.__x [i];
context->regs [ARMREG_R30] = arch_state->ts_64.__lr;
context->regs [ARMREG_SP] = arch_state->ts_64.__sp;
context->pc = arch_state->ts_64.__pc;
+
+ for (i = 0; i < 32; ++i)
+ context->fregs [i] = arch_fpstate->__v [i];
}
int
mono_mach_arch_get_thread_states (thread_port_t thread, thread_state_t state, mach_msg_type_number_t *count, thread_state_t fpstate, mach_msg_type_number_t *fpcount)
{
arm_unified_thread_state_t *arch_state = (arm_unified_thread_state_t *) state;
+ arm_neon_state64_t *arch_fpstate = (arm_neon_state64_t *) fpstate;
kern_return_t ret;
*count = ARM_UNIFIED_THREAD_STATE_COUNT;
-
ret = thread_get_state (thread, ARM_UNIFIED_THREAD_STATE, (thread_state_t) arch_state, count);
+ if (ret != KERN_SUCCESS)
+ return ret;
+
+ *fpcount = ARM_NEON_STATE64_COUNT;
+ ret = thread_get_state (thread, ARM_NEON_STATE64, (thread_state_t) arch_fpstate, fpcount);
return ret;
}
kern_return_t
mono_mach_arch_set_thread_states (thread_port_t thread, thread_state_t state, mach_msg_type_number_t count, thread_state_t fpstate, mach_msg_type_number_t fpcount)
{
- return thread_set_state (thread, ARM_UNIFIED_THREAD_STATE, state, count);
+ kern_return_t ret;
+ ret = thread_set_state (thread, ARM_UNIFIED_THREAD_STATE, state, count);
+ if (ret != KERN_SUCCESS)
+ return ret;
+ ret = thread_set_state (thread, ARM_NEON_STATE64, fpstate, fpcount);
+ return ret;
}
void *
g_assert (fpctx->head.magic == FPSIMD_MAGIC);
for (i = 0; i < 32; ++i)
- /* Only store the bottom 8 bytes for now */
- *(guint64*)&(mctx->fregs [i]) = fpctx->vregs [i];
+ mctx->fregs [i] = fpctx->vregs [i];
#endif
/* FIXME: apple */
#endif
#define MONO_CONTEXT_OFFSET(field, index, field_type) \
"i" (offsetof (MonoContext, field) + (index) * sizeof (field_type))
+#if defined(TARGET_X86)
#if defined(__APPLE__)
typedef struct __darwin_xmm_reg MonoContextSimdReg;
#endif
+#elif defined(TARGET_AMD64)
+#if defined(__APPLE__)
+typedef struct __darwin_xmm_reg MonoContextSimdReg;
+#elif defined(__linux__)
+typedef struct _libc_xmmreg MonoContextSimdReg;
+#endif
+#elif defined(TARGET_ARM64)
+typedef __uint128_t MonoContextSimdReg;
+#endif
/*
* General notes about mono-context.
typedef struct {
mgreg_t gregs [AMD64_NREG];
-#ifdef __APPLE__
+#if defined(__APPLE__) || defined(__linux__)
MonoContextSimdReg fregs [AMD64_XMM_NREG];
#else
double fregs [AMD64_XMM_NREG];
typedef struct {
mgreg_t regs [32];
- double fregs [32];
+ /* FIXME not fully saved in trampolines */
+ MonoContextSimdReg fregs [32];
mgreg_t pc;
/*
* fregs might not be initialized if this context was created from a
#define MONO_CONTEXT_GET_CURRENT(ctx) do { \
arm_unified_thread_state_t thread_state; \
+ arm_neon_state64_t thread_fpstate; \
int state_flavor = ARM_UNIFIED_THREAD_STATE; \
+ int fpstate_flavor = ARM_NEON_STATE64; \
unsigned state_count = ARM_UNIFIED_THREAD_STATE_COUNT; \
+ unsigned fpstate_count = ARM_NEON_STATE64_COUNT; \
thread_port_t self = mach_thread_self (); \
kern_return_t ret = thread_get_state (self, state_flavor, (thread_state_t) &thread_state, &state_count); \
g_assert (ret == 0); \
- mono_mach_arch_thread_states_to_mono_context ((thread_state_t)&thread_state, (thread_state_t)NULL, &ctx); \
+ ret = thread_get_state (self, fpstate_flavor, (thread_state_t) &thread_fpstate, &fpstate_count); \
+ g_assert (ret == 0); \
+ mono_mach_arch_thread_states_to_mono_context ((thread_state_t) &thread_state, (thread_state_t) &thread_fpstate, &ctx); \
mach_port_deallocate (current_task (), self); \
} while (0);
"stp x30, xzr, [x16], #8\n" \
"mov x30, sp\n" \
"str x30, [x16], #8\n" \
- "stp d0, d1, [x16], #16\n" \
- "stp d2, d3, [x16], #16\n" \
- "stp d4, d5, [x16], #16\n" \
- "stp d6, d7, [x16], #16\n" \
- "stp d8, d9, [x16], #16\n" \
- "stp d10, d11, [x16], #16\n" \
- "stp d12, d13, [x16], #16\n" \
- "stp d14, d15, [x16], #16\n" \
- "stp d16, d17, [x16], #16\n" \
- "stp d18, d19, [x16], #16\n" \
- "stp d20, d21, [x16], #16\n" \
- "stp d22, d23, [x16], #16\n" \
- "stp d24, d25, [x16], #16\n" \
- "stp d26, d27, [x16], #16\n" \
- "stp d28, d29, [x16], #16\n" \
- "stp d30, d31, [x16], #16\n" \
+ "stp q0, q1, [x16], #32\n" \
+ "stp q2, q3, [x16], #32\n" \
+ "stp q4, q5, [x16], #32\n" \
+ "stp q6, q7, [x16], #32\n" \
+ "stp q8, q9, [x16], #32\n" \
+ "stp q10, q11, [x16], #32\n" \
+ "stp q12, q13, [x16], #32\n" \
+ "stp q14, q15, [x16], #32\n" \
+ "stp q16, q17, [x16], #32\n" \
+ "stp q18, q19, [x16], #32\n" \
+ "stp q20, q21, [x16], #32\n" \
+ "stp q22, q23, [x16], #32\n" \
+ "stp q24, q25, [x16], #32\n" \
+ "stp q26, q27, [x16], #32\n" \
+ "stp q28, q29, [x16], #32\n" \
+ "stp q30, q31, [x16], #32\n" \
: \
: "r" (&ctx.regs) \
: "x16", "x30", "memory" \
#define UCONTEXT_REG_R15(ctx) (((ucontext_t*)(ctx))->sc_r15)
#elif !defined(HOST_WIN32)
#define UCONTEXT_GREGS(ctx) ((guint64*)&(((ucontext_t*)(ctx))->uc_mcontext.gregs))
+ #define UCONTEXT_FREGS(ctx) (((ucontext_t*)(ctx))->uc_mcontext.fpregs->_xmm)
#endif
#ifdef UCONTEXT_GREGS
#define UCONTEXT_REG_R15(ctx) (UCONTEXT_GREGS ((ctx)) [REG_R15])
#endif
+#ifdef UCONTEXT_FREGS
+#define UCONTEXT_REG_XMM
+#define UCONTEXT_REG_XMM0(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM0])
+#define UCONTEXT_REG_XMM1(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM1])
+#define UCONTEXT_REG_XMM2(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM2])
+#define UCONTEXT_REG_XMM3(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM3])
+#define UCONTEXT_REG_XMM4(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM4])
+#define UCONTEXT_REG_XMM5(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM5])
+#define UCONTEXT_REG_XMM6(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM6])
+#define UCONTEXT_REG_XMM7(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM7])
+#define UCONTEXT_REG_XMM8(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM8])
+#define UCONTEXT_REG_XMM9(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM9])
+#define UCONTEXT_REG_XMM10(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM10])
+#define UCONTEXT_REG_XMM11(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM11])
+#define UCONTEXT_REG_XMM12(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM12])
+#define UCONTEXT_REG_XMM13(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM13])
+#define UCONTEXT_REG_XMM14(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM14])
+#define UCONTEXT_REG_XMM15(ctx) (UCONTEXT_FREGS ((ctx)) [AMD64_XMM15])
+#endif
+
#elif defined(__mono_ppc__)
#if HAVE_UCONTEXT_H