* Dietmar Maurer (dietmar@ximian.com)
* Patrik Torstensson
* Zoltan Varga (vargaz@gmail.com)
+ * Johan Lorensson (lateralusx.github@gmail.com)
*
* (C) 2003 Ximian, Inc.
* Copyright 2003-2011 Novell, Inc (http://www.novell.com)
#include <mono/utils/mono-mmap.h>
#include <mono/utils/mono-memory-model.h>
#include <mono/utils/mono-tls.h>
-#include <mono/utils/mono-hwcap-x86.h>
+#include <mono/utils/mono-hwcap.h>
#include <mono/utils/mono-threads.h>
#include "trace.h"
return class1;
}
-static int
-count_fields_nested (MonoClass *klass, gboolean pinvoke)
-{
- MonoMarshalType *info;
- int i, count;
-
- count = 0;
- if (pinvoke) {
- info = mono_marshal_load_type_info (klass);
- g_assert(info);
- for (i = 0; i < info->num_fields; ++i) {
- if (MONO_TYPE_ISSTRUCT (info->fields [i].field->type))
- count += count_fields_nested (mono_class_from_mono_type (info->fields [i].field->type), pinvoke);
- else
- count ++;
- }
- } else {
- gpointer iter;
- MonoClassField *field;
-
- iter = NULL;
- while ((field = mono_class_get_fields (klass, &iter))) {
- if (field->type->attrs & FIELD_ATTRIBUTE_STATIC)
- continue;
- if (MONO_TYPE_ISSTRUCT (field->type))
- count += count_fields_nested (mono_class_from_mono_type (field->type), pinvoke);
- else
- count ++;
- }
- }
- return count;
-}
-
typedef struct {
MonoType *type;
int size, offset;
*
* Collect field info from KLASS recursively into FIELDS.
*/
-static int
-collect_field_info_nested (MonoClass *klass, StructFieldInfo *fields, int index, int offset, gboolean pinvoke, gboolean unicode)
+static void
+collect_field_info_nested (MonoClass *klass, GArray *fields_array, int offset, gboolean pinvoke, gboolean unicode)
{
MonoMarshalType *info;
int i;
g_assert(info);
for (i = 0; i < info->num_fields; ++i) {
if (MONO_TYPE_ISSTRUCT (info->fields [i].field->type)) {
- index = collect_field_info_nested (mono_class_from_mono_type (info->fields [i].field->type), fields, index, info->fields [i].offset, pinvoke, unicode);
+ collect_field_info_nested (mono_class_from_mono_type (info->fields [i].field->type), fields_array, info->fields [i].offset, pinvoke, unicode);
} else {
guint32 align;
+ StructFieldInfo f;
- fields [index].type = info->fields [i].field->type;
- fields [index].size = mono_marshal_type_size (info->fields [i].field->type,
+ f.type = info->fields [i].field->type;
+ f.size = mono_marshal_type_size (info->fields [i].field->type,
info->fields [i].mspec,
&align, TRUE, unicode);
- fields [index].offset = offset + info->fields [i].offset;
- index ++;
+ f.offset = offset + info->fields [i].offset;
+ if (i == info->num_fields - 1 && f.size + f.offset < info->native_size) {
+ /* This can happen with .pack directives eg. 'fixed' arrays */
+ if (MONO_TYPE_IS_PRIMITIVE (f.type)) {
+ /* Replicate the last field to fill out the remaining place, since the code in add_valuetype () needs type information */
+ g_array_append_val (fields_array, f);
+ while (f.size + f.offset < info->native_size) {
+ f.offset += f.size;
+ g_array_append_val (fields_array, f);
+ }
+ } else {
+ f.size = info->native_size - f.offset;
+ g_array_append_val (fields_array, f);
+ }
+ } else {
+ g_array_append_val (fields_array, f);
+ }
}
}
} else {
if (field->type->attrs & FIELD_ATTRIBUTE_STATIC)
continue;
if (MONO_TYPE_ISSTRUCT (field->type)) {
- index = collect_field_info_nested (mono_class_from_mono_type (field->type), fields, index, field->offset - sizeof (MonoObject), pinvoke, unicode);
+ collect_field_info_nested (mono_class_from_mono_type (field->type), fields_array, field->offset - sizeof (MonoObject), pinvoke, unicode);
} else {
int align;
+ StructFieldInfo f;
- fields [index].type = field->type;
- fields [index].size = mono_type_size (field->type, &align);
- fields [index].offset = field->offset - sizeof (MonoObject) + offset;
- index ++;
+ f.type = field->type;
+ f.size = mono_type_size (field->type, &align);
+ f.offset = field->offset - sizeof (MonoObject) + offset;
+
+ g_array_append_val (fields_array, f);
}
}
}
- return index;
}
#ifdef TARGET_WIN32
-static void
-add_valuetype_win64 (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
- gboolean is_return,
- guint32 *gr, guint32 *fr, guint32 *stack_size)
-{
- guint32 size, i, nfields;
- guint32 argsize = 8;
- ArgumentClass arg_class;
- MonoMarshalType *info = NULL;
- StructFieldInfo *fields = NULL;
- MonoClass *klass;
- gboolean pass_on_stack = FALSE;
- klass = mono_class_from_mono_type (type);
- size = mini_type_stack_size_full (&klass->byval_arg, NULL, sig->pinvoke);
+/* Windows x64 ABI can pass/return value types in register of size 1,2,4,8 bytes. */
+#define MONO_WIN64_VALUE_TYPE_FITS_REG(arg_size) (arg_size <= SIZEOF_REGISTER && (arg_size == 1 || arg_size == 2 || arg_size == 4 || arg_size == 8))
- /*
- * Standard C and C++ doesn't allow empty structs, empty structs will always have a size of 1 byte.
- * GCC have an extension to allow empty structs, https://gcc.gnu.org/onlinedocs/gcc/Empty-Structures.html.
- * This cause a little dilemma since runtime build using none GCC compiler will not be compatible with
- * GCC build C libraries and the other way around. On platforms where empty structs has size of 1 byte
- * it must be represented in call and cannot be dropped.
- */
- if (0 == size && MONO_TYPE_ISSTRUCT (type) && sig->pinvoke)
- ainfo->pass_empty_struct = TRUE;
-
- if (!sig->pinvoke)
- pass_on_stack = TRUE;
+static gboolean
+allocate_register_for_valuetype_win64 (ArgInfo *arg_info, ArgumentClass arg_class, guint32 arg_size, AMD64_Reg_No int_regs [], int int_reg_count, AMD64_XMM_Reg_No float_regs [], int float_reg_count, guint32 *current_int_reg, guint32 *current_float_reg)
+{
+ gboolean result = FALSE;
- /* If this struct can't be split up naturally into 8-byte */
- /* chunks (registers), pass it on the stack. */
- if (sig->pinvoke && !pass_on_stack) {
- guint32 align;
- guint32 field_size;
+ assert (arg_info != NULL && int_regs != NULL && float_regs != NULL && current_int_reg != NULL && current_float_reg != NULL);
+ assert (arg_info->storage == ArgValuetypeInReg || arg_info->storage == ArgValuetypeAddrInIReg);
- info = mono_marshal_load_type_info (klass);
- g_assert (info);
+ arg_info->pair_storage [0] = arg_info->pair_storage [1] = ArgNone;
+ arg_info->pair_regs [0] = arg_info->pair_regs [1] = ArgNone;
+ arg_info->pair_size [0] = 0;
+ arg_info->pair_size [1] = 0;
+ arg_info->nregs = 0;
- /*
- * Collect field information recursively to be able to
- * handle nested structures.
- */
- nfields = count_fields_nested (klass, sig->pinvoke);
- fields = g_new0 (StructFieldInfo, nfields);
- collect_field_info_nested (klass, fields, 0, 0, sig->pinvoke, klass->unicode);
+ if (arg_class == ARG_CLASS_INTEGER && *current_int_reg < int_reg_count) {
+ /* Pass parameter in integer register. */
+ arg_info->pair_storage [0] = ArgInIReg;
+ arg_info->pair_regs [0] = int_regs [*current_int_reg];
+ (*current_int_reg) ++;
+ result = TRUE;
+ } else if (arg_class == ARG_CLASS_SSE && *current_float_reg < float_reg_count) {
+ /* Pass parameter in float register. */
+ arg_info->pair_storage [0] = (arg_size <= sizeof (gfloat)) ? ArgInFloatSSEReg : ArgInDoubleSSEReg;
+ arg_info->pair_regs [0] = float_regs [*current_float_reg];
+ (*current_float_reg) ++;
+ result = TRUE;
+ }
- for (i = 0; i < nfields; ++i) {
- if ((fields [i].offset < 8) && (fields [i].offset + fields [i].size) > 8) {
- pass_on_stack = TRUE;
- break;
- }
- }
+ if (result == TRUE) {
+ arg_info->pair_size [0] = arg_size;
+ arg_info->nregs = 1;
}
- if (pass_on_stack) {
- /* Allways pass in memory */
- ainfo->offset = *stack_size;
- *stack_size += ALIGN_TO (size, 8);
- ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
- if (!is_return)
- ainfo->arg_size = ALIGN_TO (size, 8);
+ return result;
+}
- g_free (fields);
- return;
- }
+static inline gboolean
+allocate_parameter_register_for_valuetype_win64 (ArgInfo *arg_info, ArgumentClass arg_class, guint32 arg_size, guint32 *current_int_reg, guint32 *current_float_reg)
+{
+ return allocate_register_for_valuetype_win64 (arg_info, arg_class, arg_size, param_regs, PARAM_REGS, float_param_regs, FLOAT_PARAM_REGS, current_int_reg, current_float_reg);
+}
- if (!sig->pinvoke) {
- int n = mono_class_value_size (klass, NULL);
+static inline gboolean
+allocate_return_register_for_valuetype_win64 (ArgInfo *arg_info, ArgumentClass arg_class, guint32 arg_size, guint32 *current_int_reg, guint32 *current_float_reg)
+{
+ return allocate_register_for_valuetype_win64 (arg_info, arg_class, arg_size, return_regs, RETURN_REGS, float_return_regs, FLOAT_RETURN_REGS, current_int_reg, current_float_reg);
+}
- argsize = n;
+static void
+allocate_storage_for_valuetype_win64 (ArgInfo *arg_info, MonoType *type, gboolean is_return, ArgumentClass arg_class,
+ guint32 arg_size, guint32 *current_int_reg, guint32 *current_float_reg, guint32 *stack_size)
+{
+ /* Windows x64 value type ABI.
+ *
+ * Parameters: https://msdn.microsoft.com/en-us/library/zthk2dkh.aspx
+ *
+ * Integer/Float types smaller than or equals to 8 bytes or porperly sized struct/union (1,2,4,8)
+ * Try pass in register using ArgValuetypeInReg/(ArgInIReg|ArgInFloatSSEReg|ArgInDoubleSSEReg) as storage and size of parameter(1,2,4,8), if no more registers, pass on stack using ArgOnStack as storage and size of parameter(1,2,4,8).
+ * Integer/Float types bigger than 8 bytes or struct/unions larger than 8 bytes or (3,5,6,7).
+ * Try to pass pointer in register using ArgValuetypeAddrInIReg, if no more registers, pass pointer on stack using ArgValuetypeAddrOnStack as storage and parameter size of register (8 bytes).
+ *
+ * Return values: https://msdn.microsoft.com/en-us/library/7572ztz4.aspx.
+ *
+ * Integers/Float types smaller than or equal to 8 bytes
+ * Return in corresponding register RAX/XMM0 using ArgValuetypeInReg/(ArgInIReg|ArgInFloatSSEReg|ArgInDoubleSSEReg) as storage and size of parameter(1,2,4,8).
+ * Properly sized struct/unions (1,2,4,8)
+ * Return in register RAX using ArgValuetypeInReg as storage and size of parameter(1,2,4,8).
+ * Types bigger than 8 bytes or struct/unions larger than 8 bytes or (3,5,6,7).
+ * Return pointer to allocated stack space (allocated by caller) using ArgValuetypeAddrInIReg as storage and parameter size.
+ */
- if (n > 8)
- arg_class = ARG_CLASS_MEMORY;
- else
- /* Always pass in 1 integer register */
- arg_class = ARG_CLASS_INTEGER;
- } else {
- g_assert (info);
+ assert (arg_info != NULL && type != NULL && current_int_reg != NULL && current_float_reg != NULL && stack_size != NULL);
- /*Only drop value type if its not an empty struct as input that must be represented in call*/
- if ((!fields && !ainfo->pass_empty_struct) || (!fields && ainfo->pass_empty_struct && is_return)) {
- ainfo->storage = ArgValuetypeInReg;
- ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
- return;
- }
+ if (!is_return) {
- switch (info->native_size) {
- case 0:
- g_assert (!fields && MONO_TYPE_ISSTRUCT (type) && !is_return);
- break;
- case 1: case 2: case 4: case 8:
- break;
- default:
- if (is_return) {
- ainfo->storage = ArgValuetypeAddrInIReg;
- ainfo->offset = *stack_size;
- *stack_size += ALIGN_TO (info->native_size, 8);
- }
- else {
- ainfo->storage = ArgValuetypeAddrInIReg;
+ /* Parameter cases. */
+ if (arg_class != ARG_CLASS_MEMORY && MONO_WIN64_VALUE_TYPE_FITS_REG (arg_size)) {
+ assert (arg_size == 1 || arg_size == 2 || arg_size == 4 || arg_size == 8);
- if (*gr < PARAM_REGS) {
- ainfo->pair_storage [0] = ArgInIReg;
- ainfo->pair_regs [0] = param_regs [*gr];
- (*gr) ++;
- }
- else {
- ainfo->pair_storage [0] = ArgOnStack;
- ainfo->offset = *stack_size;
- ainfo->arg_size = sizeof (mgreg_t);
- *stack_size += 8;
- }
+ /* First, try to use registers for parameter. If type is struct it can only be passed by value in integer register. */
+ arg_info->storage = ArgValuetypeInReg;
+ if (!allocate_parameter_register_for_valuetype_win64 (arg_info, !MONO_TYPE_ISSTRUCT (type) ? arg_class : ARG_CLASS_INTEGER, arg_size, current_int_reg, current_float_reg)) {
+ /* No more registers, fallback passing parameter on stack as value. */
+ assert (arg_info->pair_storage [0] == ArgNone && arg_info->pair_storage [1] == ArgNone && arg_info->pair_size [0] == 0 && arg_info->pair_size [1] == 0 && arg_info->nregs == 0);
+
+ /* Passing value directly on stack, so use size of value. */
+ arg_info->storage = ArgOnStack;
+ arg_size = ALIGN_TO (arg_size, sizeof (mgreg_t));
+ arg_info->offset = *stack_size;
+ arg_info->arg_size = arg_size;
+ *stack_size += arg_size;
+ }
+ } else {
+ /* Fallback to stack, try to pass address to parameter in register. Always use integer register to represent stack address. */
+ arg_info->storage = ArgValuetypeAddrInIReg;
+ if (!allocate_parameter_register_for_valuetype_win64 (arg_info, ARG_CLASS_INTEGER, arg_size, current_int_reg, current_float_reg)) {
+ /* No more registers, fallback passing address to parameter on stack. */
+ assert (arg_info->pair_storage [0] == ArgNone && arg_info->pair_storage [1] == ArgNone && arg_info->pair_size [0] == 0 && arg_info->pair_size [1] == 0 && arg_info->nregs == 0);
+
+ /* Passing an address to value on stack, so use size of register as argument size. */
+ arg_info->storage = ArgValuetypeAddrOnStack;
+ arg_size = sizeof (mgreg_t);
+ arg_info->offset = *stack_size;
+ arg_info->arg_size = arg_size;
+ *stack_size += arg_size;
}
-
- g_free (fields);
- return;
}
+ } else {
+ /* Return value cases. */
+ if (arg_class != ARG_CLASS_MEMORY && MONO_WIN64_VALUE_TYPE_FITS_REG (arg_size)) {
+ assert (arg_size == 1 || arg_size == 2 || arg_size == 4 || arg_size == 8);
- int size;
- guint32 align;
- ArgumentClass class1;
+ /* Return value fits into return registers. If type is struct it can only be returned by value in integer register. */
+ arg_info->storage = ArgValuetypeInReg;
+ allocate_return_register_for_valuetype_win64 (arg_info, !MONO_TYPE_ISSTRUCT (type) ? arg_class : ARG_CLASS_INTEGER, arg_size, current_int_reg, current_float_reg);
- if (nfields == 0 && ainfo->pass_empty_struct) {
- g_assert (!fields && !is_return);
- class1 = ARG_CLASS_INTEGER;
- }
- else if (nfields == 0)
- class1 = ARG_CLASS_MEMORY;
- else
- class1 = ARG_CLASS_NO_CLASS;
- for (i = 0; i < nfields; ++i) {
- /* How far into this quad this data extends.*/
- /* (8 is size of quad) */
- argsize = fields [i].offset + fields [i].size;
+ /* Only RAX/XMM0 should be used to return valuetype. */
+ assert ((arg_info->pair_regs[0] == AMD64_RAX && arg_info->pair_regs[1] == ArgNone) || (arg_info->pair_regs[0] == AMD64_XMM0 && arg_info->pair_regs[1] == ArgNone));
+ } else {
+ /* Return value doesn't fit into return register, return address to allocated stack space (allocated by caller and passed as input). */
+ arg_info->storage = ArgValuetypeAddrInIReg;
+ allocate_return_register_for_valuetype_win64 (arg_info, ARG_CLASS_INTEGER, arg_size, current_int_reg, current_float_reg);
- class1 = merge_argument_class_from_type (fields [i].type, class1);
+ /* Only RAX should be used to return valuetype address. */
+ assert (arg_info->pair_regs[0] == AMD64_RAX && arg_info->pair_regs[1] == ArgNone);
+
+ arg_size = ALIGN_TO (arg_size, sizeof (mgreg_t));
+ arg_info->offset = *stack_size;
+ *stack_size += arg_size;
}
- g_assert (class1 != ARG_CLASS_NO_CLASS);
- arg_class = class1;
}
+}
- g_free (fields);
+static void
+get_valuetype_size_win64 (MonoClass *klass, gboolean pinvoke, ArgInfo *arg_info, MonoType *type, ArgumentClass *arg_class, guint32 *arg_size)
+{
+ *arg_size = 0;
+ *arg_class = ARG_CLASS_NO_CLASS;
- /* Allocate registers */
- {
- int orig_gr = *gr;
- int orig_fr = *fr;
+ assert (klass != NULL && arg_info != NULL && type != NULL && arg_class != NULL && arg_size != NULL);
+
+ if (pinvoke) {
+ /* Calculate argument class type and size of marshalled type. */
+ MonoMarshalType *info = mono_marshal_load_type_info (klass);
+ *arg_size = info->native_size;
+ } else {
+ /* Calculate argument class type and size of managed type. */
+ *arg_size = mono_class_value_size (klass, NULL);
+ }
- while (argsize != 1 && argsize != 2 && argsize != 4 && argsize != 8)
- argsize ++;
+ /* Windows ABI only handle value types on stack or passed in integer register (if it fits register size). */
+ *arg_class = MONO_WIN64_VALUE_TYPE_FITS_REG (*arg_size) ? ARG_CLASS_INTEGER : ARG_CLASS_MEMORY;
- ainfo->storage = ArgValuetypeInReg;
- ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
- ainfo->pair_size [0] = argsize;
- ainfo->pair_size [1] = 0;
- ainfo->nregs = 1;
- switch (arg_class) {
- case ARG_CLASS_INTEGER:
- if (*gr >= PARAM_REGS)
- arg_class = ARG_CLASS_MEMORY;
- else {
- ainfo->pair_storage [0] = ArgInIReg;
- if (is_return)
- ainfo->pair_regs [0] = return_regs [*gr];
- else
- ainfo->pair_regs [0] = param_regs [*gr];
- (*gr) ++;
- }
- break;
- case ARG_CLASS_SSE:
- if (*fr >= FLOAT_PARAM_REGS)
- arg_class = ARG_CLASS_MEMORY;
- else {
- if (argsize <= 4)
- ainfo->pair_storage [0] = ArgInFloatSSEReg;
- else
- ainfo->pair_storage [0] = ArgInDoubleSSEReg;
- ainfo->pair_regs [0] = *fr;
- (*fr) ++;
- }
- break;
- case ARG_CLASS_MEMORY:
- break;
- default:
- g_assert_not_reached ();
- }
+ if (*arg_class == ARG_CLASS_MEMORY) {
+ /* Value type has a size that doesn't seem to fit register according to ABI. Try to used full stack size of type. */
+ *arg_size = mini_type_stack_size_full (&klass->byval_arg, NULL, pinvoke);
+ }
- if (arg_class == ARG_CLASS_MEMORY) {
- /* Revert possible register assignments */
- *gr = orig_gr;
- *fr = orig_fr;
+ /*
+ * Standard C and C++ doesn't allow empty structs, empty structs will always have a size of 1 byte.
+ * GCC have an extension to allow empty structs, https://gcc.gnu.org/onlinedocs/gcc/Empty-Structures.html.
+ * This cause a little dilemma since runtime build using none GCC compiler will not be compatible with
+ * GCC build C libraries and the other way around. On platforms where empty structs has size of 1 byte
+ * it must be represented in call and cannot be dropped.
+ */
+ if (*arg_size == 0 && MONO_TYPE_ISSTRUCT (type)) {
+ arg_info->pass_empty_struct = TRUE;
+ *arg_size = SIZEOF_REGISTER;
+ *arg_class = ARG_CLASS_INTEGER;
+ }
- ainfo->offset = *stack_size;
- *stack_size += sizeof (mgreg_t);
- ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
- if (!is_return)
- ainfo->arg_size = sizeof (mgreg_t);
- }
+ assert (*arg_class != ARG_CLASS_NO_CLASS);
+}
+
+static void
+add_valuetype_win64 (MonoMethodSignature *signature, ArgInfo *arg_info, MonoType *type,
+ gboolean is_return, guint32 *current_int_reg, guint32 *current_float_reg, guint32 *stack_size)
+{
+ guint32 arg_size = SIZEOF_REGISTER;
+ MonoClass *klass = NULL;
+ ArgumentClass arg_class;
+
+ assert (signature != NULL && arg_info != NULL && type != NULL && current_int_reg != NULL && current_float_reg != NULL && stack_size != NULL);
+
+ klass = mono_class_from_mono_type (type);
+ get_valuetype_size_win64 (klass, signature->pinvoke, arg_info, type, &arg_class, &arg_size);
+
+ /* Only drop value type if its not an empty struct as input that must be represented in call */
+ if ((arg_size == 0 && !arg_info->pass_empty_struct) || (arg_size == 0 && arg_info->pass_empty_struct && is_return)) {
+ arg_info->storage = ArgValuetypeInReg;
+ arg_info->pair_storage [0] = arg_info->pair_storage [1] = ArgNone;
+ } else {
+ /* Alocate storage for value type. */
+ allocate_storage_for_valuetype_win64 (arg_info, type, is_return, arg_class, arg_size, current_int_reg, current_float_reg, stack_size);
}
}
+
#endif /* TARGET_WIN32 */
static void
guint32 quadsize [2] = {8, 8};
ArgumentClass args [2];
StructFieldInfo *fields = NULL;
+ GArray *fields_array;
MonoClass *klass;
gboolean pass_on_stack = FALSE;
int struct_size;
* Collect field information recursively to be able to
* handle nested structures.
*/
- nfields = count_fields_nested (klass, sig->pinvoke);
- fields = g_new0 (StructFieldInfo, nfields);
- collect_field_info_nested (klass, fields, 0, 0, sig->pinvoke, klass->unicode);
+ fields_array = g_array_new (FALSE, TRUE, sizeof (StructFieldInfo));
+ collect_field_info_nested (klass, fields_array, 0, sig->pinvoke, klass->unicode);
+ fields = (StructFieldInfo*)fields_array->data;
+ nfields = fields_array->len;
for (i = 0; i < nfields; ++i) {
if ((fields [i].offset < 8) && (fields [i].offset + fields [i].size) > 8) {
if (!is_return)
ainfo->arg_size = ALIGN_TO (size, 8);
- g_free (fields);
+ g_array_free (fields_array, TRUE);
return;
}
if (!is_return)
ainfo->arg_size = ALIGN_TO (struct_size, 8);
- g_free (fields);
+ g_array_free (fields_array, TRUE);
return;
}
}
}
- g_free (fields);
+ g_array_free (fields_array, TRUE);
/* Post merger cleanup */
if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
mono_aot_register_jit_icall ("mono_amd64_throw_corlib_exception", mono_amd64_throw_corlib_exception);
mono_aot_register_jit_icall ("mono_amd64_resume_unwind", mono_amd64_resume_unwind);
mono_aot_register_jit_icall ("mono_amd64_get_original_ip", mono_amd64_get_original_ip);
+ mono_aot_register_jit_icall ("mono_amd64_handler_block_trampoline_helper", mono_amd64_handler_block_trampoline_helper);
+
#if defined(MONO_ARCH_GSHAREDVT_SUPPORTED)
mono_aot_register_jit_icall ("mono_amd64_start_gsharedvt_call", mono_amd64_start_gsharedvt_call);
#endif
*exclude_mask |= MONO_OPT_CMOV;
}
+#ifdef TARGET_WIN32
+ /* The current SIMD doesn't support the argument used by a LD_ADDR to be of type OP_VTARG_ADDR. */
+ /* This will now be used for value types > 8 or of size 3,5,6,7 as dictated by windows x64 value type ABI. */
+ /* Since OP_VTARG_ADDR needs to be resolved in mono_spill_global_vars and the SIMD implementation optimize */
+ /* away the LD_ADDR in load_simd_vreg, that will cause an error in mono_spill_global_vars since incorrect opcode */
+ /* will now have a reference to an argument that won't be fully decomposed. */
+ *exclude_mask |= MONO_OPT_SIMD;
+#endif
+
return opts;
}
for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
ArgInfo *ainfo = &cinfo->args [i];
- if (ainfo->storage == ArgOnStack) {
+ if (ainfo->storage == ArgOnStack || ainfo->storage == ArgValuetypeAddrInIReg || ainfo->storage == ArgValuetypeAddrOnStack) {
/*
* The stack offset can only be determined when the frame
* size is known.
break;
case ArgValuetypeInReg:
break;
- case ArgValuetypeAddrInIReg: {
+ case ArgValuetypeAddrInIReg:
+ case ArgValuetypeAddrOnStack: {
MonoInst *indir;
g_assert (!cfg->arch.omit_fp);
-
+ g_assert (ainfo->storage == ArgValuetypeAddrInIReg || (ainfo->storage == ArgValuetypeAddrOnStack && ainfo->pair_storage [0] == ArgNone));
MONO_INST_NEW (cfg, indir, 0);
+
indir->opcode = OP_REGOFFSET;
if (ainfo->pair_storage [0] == ArgInIReg) {
indir->inst_basereg = cfg->frame_reg;
NOT_IMPLEMENTED;
}
- if (!inreg && (ainfo->storage != ArgOnStack) && (ainfo->storage != ArgValuetypeAddrInIReg) && (ainfo->storage != ArgGSharedVtOnStack)) {
+ if (!inreg && (ainfo->storage != ArgOnStack) && (ainfo->storage != ArgValuetypeAddrInIReg) && (ainfo->storage != ArgValuetypeAddrOnStack) && (ainfo->storage != ArgGSharedVtOnStack)) {
ins->opcode = OP_REGOFFSET;
ins->inst_basereg = cfg->frame_reg;
/* These arguments are saved to the stack in the prolog */
if (cfg->method->save_lmf) {
cfg->lmf_ir = TRUE;
#if !defined(TARGET_WIN32)
- if (mono_get_lmf_tls_offset () != -1 && !optimize_for_xen)
+ if (!optimize_for_xen)
cfg->lmf_ir_mono_lmf = TRUE;
#endif
}
case ArgOnStack:
case ArgValuetypeInReg:
case ArgValuetypeAddrInIReg:
+ case ArgValuetypeAddrOnStack:
case ArgGSharedVtInReg:
case ArgGSharedVtOnStack: {
if (ainfo->storage == ArgOnStack && !MONO_TYPE_ISSTRUCT (t) && !call->tail_call)
}
break;
}
- case ArgValuetypeAddrInIReg: {
+ case ArgValuetypeAddrInIReg:
+ case ArgValuetypeAddrOnStack: {
MonoInst *vtaddr, *load;
+
+ g_assert (ainfo->storage == ArgValuetypeAddrInIReg || (ainfo->storage == ArgValuetypeAddrOnStack && ainfo->pair_storage [0] == ArgNone));
+
vtaddr = mono_compile_create_var (cfg, &ins->klass->byval_arg, OP_LOCAL);
MONO_INST_NEW (cfg, load, OP_LDADDR);
#endif /* DISABLE_JIT */
-#ifdef __APPLE__
+#ifdef TARGET_MACH
static int tls_gs_offset;
#endif
gboolean
-mono_amd64_have_tls_get (void)
+mono_arch_have_fast_tls (void)
{
#ifdef TARGET_MACH
- static gboolean have_tls_get = FALSE;
+ static gboolean have_fast_tls = FALSE;
static gboolean inited = FALSE;
+ guint8 *ins;
+
+ if (mini_get_debug_options ()->use_fallback_tls)
+ return FALSE;
if (inited)
- return have_tls_get;
+ return have_fast_tls;
-#if MONO_HAVE_FAST_TLS
- guint8 *ins = (guint8*)pthread_getspecific;
+ ins = (guint8*)pthread_getspecific;
/*
* We're looking for these two instructions:
* mov %gs:[offset](,%rdi,8),%rax
* retq
*/
- have_tls_get = ins [0] == 0x65 &&
+ have_fast_tls = ins [0] == 0x65 &&
ins [1] == 0x48 &&
ins [2] == 0x8b &&
ins [3] == 0x04 &&
* popq %rbp
* retq
*/
- if (!have_tls_get) {
- have_tls_get = ins [0] == 0x55 &&
+ if (!have_fast_tls) {
+ have_fast_tls = ins [0] == 0x55 &&
ins [1] == 0x48 &&
ins [2] == 0x89 &&
ins [3] == 0xe5 &&
tls_gs_offset = ins[9];
}
-#endif
-
inited = TRUE;
- return have_tls_get;
+ return have_fast_tls;
#elif defined(TARGET_ANDROID)
return FALSE;
#else
+ if (mini_get_debug_options ()->use_fallback_tls)
+ return FALSE;
return TRUE;
#endif
}
*
* Returns: a pointer to the end of the stored code
*/
-guint8*
+static guint8*
mono_amd64_emit_tls_get (guint8* code, int dreg, int tls_offset)
{
#ifdef TARGET_WIN32
amd64_mov_reg_membase (code, dreg, dreg, (tls_offset * 8) - 0x200, 8);
amd64_patch (buf [0], code);
}
-#elif defined(__APPLE__)
+#elif defined(TARGET_MACH)
x86_prefix (code, X86_GS_PREFIX);
amd64_mov_reg_mem (code, dreg, tls_gs_offset + (tls_offset * 8), 8);
#else
return code;
}
-#ifdef TARGET_WIN32
-
-#define MAX_TEB_TLS_SLOTS 64
-#define TEB_TLS_SLOTS_OFFSET 0x1480
-#define TEB_TLS_EXPANSION_SLOTS_OFFSET 0x1780
-
-static guint8*
-emit_tls_get_reg_windows (guint8* code, int dreg, int offset_reg)
-{
- int tmp_reg = -1;
- guint8 * more_than_64_slots = NULL;
- guint8 * empty_slot = NULL;
- guint8 * tls_get_reg_done = NULL;
-
- //Use temporary register for offset calculation?
- if (dreg == offset_reg) {
- tmp_reg = dreg == AMD64_RAX ? AMD64_RCX : AMD64_RAX;
- amd64_push_reg (code, tmp_reg);
- amd64_mov_reg_reg (code, tmp_reg, offset_reg, sizeof (gpointer));
- offset_reg = tmp_reg;
- }
-
- //TEB TLS slot array only contains MAX_TEB_TLS_SLOTS items, if more is used the expansion slots must be addressed.
- amd64_alu_reg_imm (code, X86_CMP, offset_reg, MAX_TEB_TLS_SLOTS);
- more_than_64_slots = code;
- amd64_branch8 (code, X86_CC_GE, 0, TRUE);
-
- //TLS slot array, _TEB.TlsSlots, is at offset TEB_TLS_SLOTS_OFFSET and index is offset * 8 in Windows 64-bit _TEB structure.
- amd64_shift_reg_imm (code, X86_SHL, offset_reg, 3);
- amd64_alu_reg_imm (code, X86_ADD, offset_reg, TEB_TLS_SLOTS_OFFSET);
-
- //TEB pointer is stored in GS segment register on Windows x64. TLS slot is located at calculated offset from that pointer.
- x86_prefix (code, X86_GS_PREFIX);
- amd64_mov_reg_membase (code, dreg, offset_reg, 0, sizeof (gpointer));
-
- tls_get_reg_done = code;
- amd64_jump8 (code, 0);
-
- amd64_patch (more_than_64_slots, code);
-
- //TLS expansion slots, _TEB.TlsExpansionSlots, is at offset TEB_TLS_EXPANSION_SLOTS_OFFSET in Windows 64-bit _TEB structure.
- x86_prefix (code, X86_GS_PREFIX);
- amd64_mov_reg_mem (code, dreg, TEB_TLS_EXPANSION_SLOTS_OFFSET, sizeof (gpointer));
-
- //Check for NULL in _TEB.TlsExpansionSlots.
- amd64_test_reg_reg (code, dreg, dreg);
- empty_slot = code;
- amd64_branch8 (code, X86_CC_EQ, 0, TRUE);
-
- //TLS expansion slots are at index offset into the expansion array.
- //Calculate for the MAX_TEB_TLS_SLOTS offsets, since the interessting offset is offset_reg - MAX_TEB_TLS_SLOTS.
- amd64_alu_reg_imm (code, X86_SUB, offset_reg, MAX_TEB_TLS_SLOTS);
- amd64_shift_reg_imm (code, X86_SHL, offset_reg, 3);
-
- amd64_mov_reg_memindex (code, dreg, dreg, 0, offset_reg, 0, sizeof (gpointer));
-
- amd64_patch (empty_slot, code);
- amd64_patch (tls_get_reg_done, code);
-
- if (tmp_reg != -1)
- amd64_pop_reg (code, tmp_reg);
-
- return code;
-}
-
-#endif
-
-static guint8*
-emit_tls_get_reg (guint8* code, int dreg, int offset_reg)
-{
- /* offset_reg contains a value translated by mono_arch_translate_tls_offset () */
-#ifdef TARGET_OSX
- if (dreg != offset_reg)
- amd64_mov_reg_reg (code, dreg, offset_reg, sizeof (mgreg_t));
- amd64_prefix (code, X86_GS_PREFIX);
- amd64_mov_reg_membase (code, dreg, dreg, 0, sizeof (mgreg_t));
-#elif defined(__linux__)
- int tmpreg = -1;
-
- if (dreg == offset_reg) {
- /* Use a temporary reg by saving it to the redzone */
- tmpreg = dreg == AMD64_RAX ? AMD64_RCX : AMD64_RAX;
- amd64_mov_membase_reg (code, AMD64_RSP, -8, tmpreg, 8);
- amd64_mov_reg_reg (code, tmpreg, offset_reg, sizeof (gpointer));
- offset_reg = tmpreg;
- }
- x86_prefix (code, X86_FS_PREFIX);
- amd64_mov_reg_mem (code, dreg, 0, 8);
- amd64_mov_reg_memindex (code, dreg, dreg, 0, offset_reg, 0, 8);
- if (tmpreg != -1)
- amd64_mov_reg_membase (code, tmpreg, AMD64_RSP, -8, 8);
-#elif defined(TARGET_WIN32)
- code = emit_tls_get_reg_windows (code, dreg, offset_reg);
-#else
- g_assert_not_reached ();
-#endif
- return code;
-}
-
static guint8*
-amd64_emit_tls_set (guint8 *code, int sreg, int tls_offset)
+mono_amd64_emit_tls_set (guint8 *code, int sreg, int tls_offset)
{
#ifdef TARGET_WIN32
g_assert_not_reached ();
-#elif defined(__APPLE__)
+#elif defined(TARGET_MACH)
x86_prefix (code, X86_GS_PREFIX);
amd64_mov_mem_reg (code, tls_gs_offset + (tls_offset * 8), sreg, 8);
#else
return code;
}
-static guint8*
-amd64_emit_tls_set_reg (guint8 *code, int sreg, int offset_reg)
-{
- /* offset_reg contains a value translated by mono_arch_translate_tls_offset () */
-#ifdef TARGET_WIN32
- g_assert_not_reached ();
-#elif defined(__APPLE__)
- x86_prefix (code, X86_GS_PREFIX);
- amd64_mov_membase_reg (code, offset_reg, 0, sreg, 8);
-#else
- x86_prefix (code, X86_FS_PREFIX);
- amd64_mov_membase_reg (code, offset_reg, 0, sreg, 8);
-#endif
- return code;
-}
-
- /*
- * mono_arch_translate_tls_offset:
- *
- * Translate the TLS offset OFFSET computed by MONO_THREAD_VAR_OFFSET () into a format usable by OP_TLS_GET_REG/OP_TLS_SET_REG.
- */
-int
-mono_arch_translate_tls_offset (int offset)
-{
-#ifdef __APPLE__
- return tls_gs_offset + (offset * 8);
-#else
- return offset;
-#endif
-}
-
/*
* emit_setup_lmf:
*
return code;
}
+#ifdef TARGET_WIN32
+
+#define TEB_LAST_ERROR_OFFSET 0x068
+
+static guint8*
+emit_get_last_error (guint8* code, int dreg)
+{
+ /* Threads last error value is located in TEB_LAST_ERROR_OFFSET. */
+ x86_prefix (code, X86_GS_PREFIX);
+ amd64_mov_reg_membase (code, dreg, TEB_LAST_ERROR_OFFSET, 0, sizeof (guint32));
+
+ return code;
+}
+
+#else
+
+static guint8*
+emit_get_last_error (guint8* code, int dreg)
+{
+ g_assert_not_reached ();
+}
+
+#endif
+
/* benchmark and set based on cpu */
#define LOOP_ALIGNMENT 8
#define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
break;
}
case OP_GENERIC_CLASS_INIT: {
- static int byte_offset = -1;
- static guint8 bitmask;
guint8 *jump;
g_assert (ins->sreg1 == MONO_AMD64_ARG_REG1);
- if (byte_offset < 0)
- mono_marshal_find_bitfield_offset (MonoVTable, initialized, &byte_offset, &bitmask);
-
- amd64_test_membase_imm_size (code, ins->sreg1, byte_offset, bitmask, 1);
+ amd64_test_membase_imm_size (code, ins->sreg1, MONO_STRUCT_OFFSET (MonoVTable, initialized), 1, 1);
jump = code;
amd64_branch8 (code, X86_CC_NZ, -1, 1);
code = mono_amd64_emit_tls_get (code, ins->dreg, ins->inst_offset);
break;
}
- case OP_TLS_GET_REG:
- code = emit_tls_get_reg (code, ins->dreg, ins->sreg1);
- break;
case OP_TLS_SET: {
- code = amd64_emit_tls_set (code, ins->sreg1, ins->inst_offset);
- break;
- }
- case OP_TLS_SET_REG: {
- code = amd64_emit_tls_set_reg (code, ins->sreg1, ins->sreg2);
+ code = mono_amd64_emit_tls_set (code, ins->sreg1, ins->inst_offset);
break;
}
case OP_MEMORY_BARRIER: {
case OP_XZERO:
amd64_sse_pxor_reg_reg (code, ins->dreg, ins->dreg);
break;
+ case OP_XONES:
+ amd64_sse_pcmpeqb_reg_reg (code, ins->dreg, ins->dreg);
+ break;
case OP_ICONV_TO_R4_RAW:
amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
+ if (!cfg->r4fp)
+ amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
break;
case OP_FCONV_TO_R8_X:
ins->backend.pc_offset = code - cfg->native_code;
bb->spill_slot_defs = g_slist_prepend_mempool (cfg->mempool, bb->spill_slot_defs, ins);
break;
+ case OP_GET_LAST_ERROR:
+ emit_get_last_error(code, ins->dreg);
+ break;
default:
g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
g_assert_not_reached ();
if (ainfo->pair_storage [0] == ArgInIReg)
amd64_mov_membase_reg (code, ins->inst_left->inst_basereg, ins->inst_left->inst_offset, ainfo->pair_regs [0], sizeof (gpointer));
break;
+ case ArgValuetypeAddrOnStack:
+ break;
case ArgGSharedVtInReg:
amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg, 8);
break;
if (method->save_lmf) {
/* check if we need to restore protection of the stack after a stack overflow */
- if (!cfg->compile_aot && mono_get_jit_tls_offset () != -1) {
+ if (!cfg->compile_aot && mono_arch_have_fast_tls () && mono_tls_get_tls_offset (TLS_KEY_JIT_TLS) != -1) {
guint8 *patch;
- code = mono_amd64_emit_tls_get (code, AMD64_RCX, mono_get_jit_tls_offset ());
+ code = mono_amd64_emit_tls_get (code, AMD64_RCX, mono_tls_get_tls_offset (TLS_KEY_JIT_TLS));
/* we load the value in a separate instruction: this mechanism may be
* used later as a safer way to do thread interruption
*/
amd64_jump_membase (code, AMD64_RAX, offset);
mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL);
- if (load_imt_reg)
- tramp_name = g_strdup_printf ("delegate_virtual_invoke_imt_%d", - offset / sizeof (gpointer));
- else
- tramp_name = g_strdup_printf ("delegate_virtual_invoke_%d", offset / sizeof (gpointer));
+ tramp_name = mono_get_delegate_virtual_invoke_impl_name (load_imt_reg, offset);
*info = mono_tramp_info_create (tramp_name, start, code - start, NULL, unwind_ops);
g_free (tramp_name);
res = g_slist_prepend (res, info);
}
- for (i = 0; i <= MAX_VIRTUAL_DELEGATE_OFFSET; ++i) {
+ for (i = 1; i <= MONO_IMT_SIZE; ++i) {
get_delegate_virtual_invoke_impl (&info, TRUE, - i * SIZEOF_VOID_P);
res = g_slist_prepend (res, info);
+ }
+ for (i = 0; i <= MAX_VIRTUAL_DELEGATE_OFFSET; ++i) {
get_delegate_virtual_invoke_impl (&info, FALSE, i * SIZEOF_VOID_P);
res = g_slist_prepend (res, info);
+ get_delegate_virtual_invoke_impl (&info, TRUE, i * SIZEOF_VOID_P);
+ res = g_slist_prepend (res, info);
}
return res;
* LOCKING: called with the domain lock held
*/
gpointer
-mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
+mono_arch_build_imt_trampoline (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
gpointer fail_tramp)
{
int i;
size += item->chunk_size;
}
if (fail_tramp)
- code = (guint8 *)mono_method_alloc_generic_virtual_thunk (domain, size);
+ code = (guint8 *)mono_method_alloc_generic_virtual_trampoline (domain, size);
else
code = (guint8 *)mono_domain_code_reserve (domain, size);
start = code;
}
if (!fail_tramp)
- mono_stats.imt_thunks_size += code - start;
+ mono_stats.imt_trampolines_size += code - start;
g_assert (code - start <= size);
mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_IMT_TRAMPOLINE, NULL);